Skip to content

mis.data.dataloader

[docs] module mis.data.dataloader

import pandas as pd
import networkx as nx
from mis import MISInstance
from geopy.distance import geodesic
from pathlib import Path


class DataLoader:
    """
    DataLoader class to handle loading of coordinates from CSV files,
    calculating distances between coordinates, and building a Maximum Independent Set (MIS) instance.

    Attributes:
        coordinates_dataset (list[tuple[float, float]]): A list of tuples representing coordinates (latitude, longitude).
    """

    coordinates_dataset: list[tuple[float, float]]

    @staticmethod
    def distance_from_coordinates(
        coord1: tuple[float, float], coord2: tuple[float, float]
    ) -> float:
        """
        Calculate the distance between two geodesic coordinates.

        Args:
            coord1 (tuple[float, float]): The first coordinate as a tuple (latitude, longitude).
            coord2 (tuple[float, float]): The second coordinate as a tuple (latitude, longitude).

        Returns:
            float: The distance between the two coordinates in kilometers.
        """
        return float(geodesic(coord1, coord2).km)

    def load_from_csv_coordinates(self, file_path: Path) -> None:
        """
        Load coordinates from a CSV file.
        The CSV file should have a column named 'coordonnees' with coordinates in the format "lat,lon".

        Args:
            file_path (Path): The path to the CSV file containing coordinates.
        """
        # error handling with a try-except block
        if not file_path.exists():
            raise FileNotFoundError(f"The file {file_path} does not exist.")
        if not file_path.is_file():
            raise ValueError(f"The path {file_path} is not a file.")
        if not file_path.suffix == ".csv":
            raise ValueError(f"The file {file_path} is not a CSV file.")

        df = pd.read_csv(file_path, sep=";")

        if "coordonnees" not in df.columns:
            raise ValueError("The CSV file must contain a 'coordonnees' column.")
        if df.empty:
            raise ValueError("The CSV file is empty.")
        if df["coordonnees"].isnull().any():
            raise ValueError("The 'coordonnees' column contains null values.")
        if not all(df["coordonnees"].apply(lambda x: len(x.split(",")) == 2)):
            raise ValueError(
                "All entries in the 'coordonnees' column must be in the format 'lat,lon'."
            )

        self.coordinates_dataset = [
            (float(c.split(",")[0]), float(c.split(",")[1])) for c in df["coordonnees"]
        ]

    def build_mis_instance_from_coordinates(
        self, antenna_range: float, antennas: set[int] = None
    ) -> MISInstance:
        """
        Build a Maximum Independent Set (MIS) instance from the loaded coordinates.
        The function creates a graph where nodes represent antennas and edges represent
        connections between antennas that are within the specified range.
        Args:
            antenna_range (float): The maximum distance between antennas to consider them connected.
            antennas (set[int], optional): A set of indices representing the antennas to include in the graph.
                                           If None, all antennas in the dataset are included.
        Returns:
            MISInstance: An instance of the Maximum Independent Set problem represented as a graph.
        """
        if self.coordinates_dataset is None:
            raise ValueError(
                "Coordinates dataset is not loaded. Please load the dataset using load_from_csv_coordinates method."
            )

        if antennas is None:
            antennas = set(range(len(self.coordinates_dataset)))

        graph = nx.Graph()

        for i in antennas:
            graph.add_node(i)
            for j in antennas:
                if (
                    i < j
                    and self.distance_from_coordinates(
                        self.coordinates_dataset[i], self.coordinates_dataset[j]
                    )
                    <= antenna_range
                ):
                    graph.add_edge(i, j)

        return MISInstance(graph)