import abc
import networkx as nx
from heapdict import heapdict
from igraph import Graph
from networkx.algorithms import approximation as approx
[docs]
class MaxIndependentSetCalc(abc.ABC):
[docs]
@abc.abstractmethod
def find_max_set(
self, num_texts: int, similar_pairs: list[tuple[int, int]]
) -> set[int]:
pass
[docs]
class OptimalIndependentSetCalc(MaxIndependentSetCalc):
[docs]
def find_max_set(
self, num_texts: int, similar_pairs: list[tuple[int, int]]
) -> set[int]:
"""
gets the maximum independent set of a graph
:param num_texts:
:param similar_pairs:
:return:
"""
g = Graph()
g.add_vertices(num_texts)
g.add_edges(similar_pairs)
independent_sets: list[tuple[int]] = g.largest_independent_vertex_sets()
return set(sorted(independent_sets)[0]) if independent_sets else set()
[docs]
class ApproximateIndependentSetCalc(MaxIndependentSetCalc):
[docs]
def find_max_set(
self, num_texts: int, similar_pairs: list[tuple[int, int]]
) -> set[int]:
"""
gets the approximate maximum independent set of a graph
:param num_texts:
:param similar_pairs:
:return:
"""
graph = nx.Graph()
graph.add_nodes_from(range(num_texts))
# Add edges representing similarity between texts
graph.add_edges_from(similar_pairs)
return set(approx.maximum_independent_set(graph))
[docs]
class GreedyIndependentSetCalc(MaxIndependentSetCalc):
[docs]
def find_max_set(
self, num_texts: int, similar_pairs: list[tuple[int, int]]
) -> set[int]:
"""
Greedy algorithm to find an approximate maximum independent set by iteratively removing
the node with the highest degree and its edges.
:param num_texts: Number of vertices (texts)
:param similar_pairs: List of edges representing pairs of similar texts
:return: Set of vertices in the approximate maximum independent set
"""
graph = nx.Graph()
graph.add_nodes_from(range(num_texts))
graph.add_edges_from(similar_pairs)
independent_set = set()
degree_heap = heapdict()
# Initialize the heap with node degrees (negated for max-heap behavior)
for node in graph.nodes:
degree_heap[node] = -len(list(graph.neighbors(node)))
while graph.edges:
# Get the node with the most edges (highest degree)
node = degree_heap.popitem()[0]
neighbors = list(graph.neighbors(node))
# Remove node and its edges from the graph
graph.remove_node(node)
# noinspection PyTypeChecker
for neighbor in neighbors:
if neighbor in degree_heap:
degree_heap[neighbor] += 1
for node in graph.nodes:
independent_set.add(node)
return independent_set