binius_utils/
graph.rs

1// Copyright 2024-2025 Irreducible Inc.
2
3use std::cmp::Ordering;
4
5/// Finds connected components using a Kruskal-like approach.
6/// Each input slice of usizes represents a set of nodes that form a complete subgraph
7/// (i.e., all of them are connected).
8///
9/// Returns a vector where each index corresponds to a graph node index and the value is the
10/// identifier of the connected component (the minimal node in that component).
11///
12/// ```
13/// use binius_utils::graph::connected_components;
14/// assert_eq!(connected_components(&[]), vec![]);
15/// assert_eq!(connected_components(&[&[0], &[1]]), vec![0, 1]);
16/// assert_eq!(connected_components(&[&[0, 1], &[1, 2], &[2, 3], &[4]]), vec![0, 0, 0, 0, 4]);
17/// assert_eq!(
18///     connected_components(&[&[0, 1, 2], &[5, 6, 7, 8], &[9], &[2, 3, 9]]),
19///     vec![0, 0, 0, 0, 4, 5, 5, 5, 5, 0]
20/// );
21/// ```
22pub fn connected_components<T: AsRef<[impl AsRef<[usize]>]>>(data: T) -> Vec<usize> {
23	let data = data.as_ref();
24	if data.is_empty() {
25		return vec![];
26	}
27
28	// Determine the maximum node ID
29	let max_id = *data
30		.iter()
31		.flat_map(|ids| ids.as_ref().iter())
32		.max()
33		.unwrap_or(&0);
34
35	let n = max_id + 1;
36	let mut uf = UnionFind::new(n);
37
38	// Convert input sets into edges. For each chunk, we connect all nodes together.
39	// To avoid adding a large number of redundant edges (fully connecting each subset),
40	// we can simply connect each node in the subset to the minimum node in that subset.
41	// This still ensures they all become part of one connected component.
42	for ids in data {
43		let ids = ids.as_ref();
44		if ids.len() > 1 {
45			let &base = ids.iter().min().unwrap();
46			for &node in ids {
47				if node != base {
48					uf.union(base, node);
49				}
50			}
51		}
52	}
53
54	// After union-find is complete, each node can be mapped to the minimum element
55	// of its component.
56	let mut result = vec![0; n];
57	for (i, res) in result.iter_mut().enumerate() {
58		let root = uf.find(i);
59		// Use the stored minimum element for the root
60		*res = uf.min_element[root];
61	}
62
63	result
64}
65
66struct UnionFind {
67	parent: Vec<usize>,
68	rank: Vec<usize>,
69	min_element: Vec<usize>,
70}
71
72impl UnionFind {
73	fn new(n: usize) -> Self {
74		Self {
75			parent: (0..n).collect(),
76			rank: vec![0; n],
77			min_element: (0..n).collect(),
78		}
79	}
80
81	fn find(&mut self, x: usize) -> usize {
82		if self.parent[x] != x {
83			self.parent[x] = self.find(self.parent[x]);
84		}
85		self.parent[x]
86	}
87
88	fn union(&mut self, x: usize, y: usize) {
89		let rx = self.find(x);
90		let ry = self.find(y);
91
92		if rx != ry {
93			// Union by rank, but also maintain the minimal element in the representative
94			let min_element = self.min_element[rx].min(self.min_element[ry]);
95			match self.rank[rx].cmp(&self.rank[ry]) {
96				Ordering::Less => {
97					self.parent[rx] = ry;
98					self.min_element[ry] = min_element;
99				}
100				Ordering::Greater => {
101					self.parent[ry] = rx;
102					self.min_element[rx] = min_element;
103				}
104				Ordering::Equal => {
105					self.parent[ry] = rx;
106					self.min_element[rx] = min_element;
107					self.rank[rx] += 1;
108				}
109			}
110		}
111	}
112}