Datasets¶

Generally, we have several QUBO problems (possibly with their solutions) or a datasets of problems to solve. qubo-solver provides several methods to create, save and load datasets.

Creating Datasets¶

Here is how we can create datasets for different sizes and save them in data/raw_datasets folder.

In [ ]:

Copied!





import os
import re
import torch
from pathlib import Path
from qubosolver import QUBODataset
from qubosolver.saveload import save_qubo_dataset, load_qubo_dataset

output_directory = Path(str(os.path.abspath("01-dataset-generation-and-loading")).replace("docs/tutorial", "qubosolver_logs/tutorial"))
dataset_sizes = range(5, 101, 5)
instances_per_size = 10
densities_list = [0.6]
coefficient_bounds = (-100.0, 100.0)
seed = 42


def create_and_save_dataset(
    output_dir: str,
    dataset_name: str,
    size: int,
    num_instances: int,
    densities: list[float],
    coefficient_bounds: tuple[float, float],
    device: str = "cpu",
    dtype: torch.dtype = torch.float32,
    seed: int | None = None,
):
    """
    Create a QUBODataset with bounds on fixed coefficient and save.

    Args:
        output_dir (str): Output directory.
        dataset_name (str): File name to generate.
        size (int): Dimension of QUBO (size x size).
        num_instances (int): Number of instances for each density.
        densities (list[float]): List of densities (ratio of non-null elements).
        coefficient_bounds (tuple[float, float]): Interval (min, max) of non-null values.
        device (str): Device (“cpu” ou “cuda”).
        dtype (torch.dtype): Tensor dtype.
        seed (int | None): Seed for reproductibility.
    """
    # Génère le dataset
    dataset = QUBODataset.from_random(
        n_matrices=num_instances,
        matrix_dim=size,
        densities=densities,
        coefficient_bounds=coefficient_bounds,
        device=device,
        dtype=dtype,
        seed=seed,
    )

    os.makedirs(output_dir, exist_ok=True)
    file_path = os.path.join(output_dir, dataset_name)
    save_qubo_dataset(dataset, file_path)
    print(f"Dataset saved to {file_path}")
    
## to generate dataset, uncomment the below

# for size in dataset_sizes:
#     fname = f"raw_qubo_dataset_size_{size}.pt"
#     create_and_save_dataset(
#         output_dir=output_directory,
#         dataset_name=fname,
#         size=size,
#         num_instances=instances_per_size,
#         densities=densities_list,
#         coefficient_bounds=coefficient_bounds,
#         device="cpu",
#         dtype=torch.float32,
#         seed=seed,
#     )
import os
import re
import torch
from pathlib import Path
from qubosolver import QUBODataset
from qubosolver.saveload import save_qubo_dataset, load_qubo_dataset

output_directory = Path(str(os.path.abspath("01-dataset-generation-and-loading")).replace("docs/tutorial", "qubosolver_logs/tutorial"))
dataset_sizes = range(5, 101, 5)
instances_per_size = 10
densities_list = [0.6]
coefficient_bounds = (-100.0, 100.0)
seed = 42


def create_and_save_dataset(
    output_dir: str,
    dataset_name: str,
    size: int,
    num_instances: int,
    densities: list[float],
    coefficient_bounds: tuple[float, float],
    device: str = "cpu",
    dtype: torch.dtype = torch.float32,
    seed: int | None = None,
):
    """
    Create a QUBODataset with bounds on fixed coefficient and save.

    Args:
        output_dir (str): Output directory.
        dataset_name (str): File name to generate.
        size (int): Dimension of QUBO (size x size).
        num_instances (int): Number of instances for each density.
        densities (list[float]): List of densities (ratio of non-null elements).
        coefficient_bounds (tuple[float, float]): Interval (min, max) of non-null values.
        device (str): Device (“cpu” ou “cuda”).
        dtype (torch.dtype): Tensor dtype.
        seed (int | None): Seed for reproductibility.
    """
    # Génère le dataset
    dataset = QUBODataset.from_random(
        n_matrices=num_instances,
        matrix_dim=size,
        densities=densities,
        coefficient_bounds=coefficient_bounds,
        device=device,
        dtype=dtype,
        seed=seed,
    )

    os.makedirs(output_dir, exist_ok=True)
    file_path = os.path.join(output_dir, dataset_name)
    save_qubo_dataset(dataset, file_path)
    print(f"Dataset saved to {file_path}")
    
## to generate dataset, uncomment the below

# for size in dataset_sizes:
#     fname = f"raw_qubo_dataset_size_{size}.pt"
#     create_and_save_dataset(
#         output_dir=output_directory,
#         dataset_name=fname,
#         size=size,
#         num_instances=instances_per_size,
#         densities=densities_list,
#         coefficient_bounds=coefficient_bounds,
#         device="cpu",
#         dtype=torch.float32,
#         seed=seed,
#     )

Load datasets¶

Here we load the datasets and show they can be used.

In [ ]:

Copied!





def load_datasets_by_size(directory: str):
    """
    Loads datasets from a directory by extracting the size from filenames.

    Args:
        directory (str): Path to the directory containing dataset files.

    Returns:
        dict[int, torch.Tensor]: A dictionary where keys are sizes and values are loaded datasets.
    """
    # Regular expression to match filenames like "raw_qubo_dataset_size_{size}.pt"
    pattern = r"raw_qubo_dataset_size_(\d+)\.pt"
    datasets_by_size = {}
    os.makedirs(directory, exist_ok=True)
    for filename in os.listdir(directory):
        match = re.match(pattern, filename)
        if match:
            size = int(match.group(1))
            file_path = os.path.join(directory, filename)
            dataset = load_qubo_dataset(file_path)
            datasets_by_size[size] = dataset
            print(f"Loaded dataset with size {size} from {file_path}")
    return datasets_by_size
def load_datasets_by_size(directory: str):
    """
    Loads datasets from a directory by extracting the size from filenames.

    Args:
        directory (str): Path to the directory containing dataset files.

    Returns:
        dict[int, torch.Tensor]: A dictionary where keys are sizes and values are loaded datasets.
    """
    # Regular expression to match filenames like "raw_qubo_dataset_size_{size}.pt"
    pattern = r"raw_qubo_dataset_size_(\d+)\.pt"
    datasets_by_size = {}
    os.makedirs(directory, exist_ok=True)
    for filename in os.listdir(directory):
        match = re.match(pattern, filename)
        if match:
            size = int(match.group(1))
            file_path = os.path.join(directory, filename)
            dataset = load_qubo_dataset(file_path)
            datasets_by_size[size] = dataset
            print(f"Loaded dataset with size {size} from {file_path}")
    return datasets_by_size

Datasets is a dict for all qubo datasets for different sizes

datasets[5] = will give us the qubodatasets of size 5 with different densities and different disparities.
the qubo dataset also has the solution component - which is None for all the datasets in the raw_datasets folder.

In [ ]:

Copied!

datasets = load_datasets_by_size(output_directory)
datasets = load_datasets_by_size(output_directory)

In [ ]:

Copied!

datasets
datasets

Dataset of a specific size¶

In [ ]:

Copied!





size = 5
data_size_5 = datasets[size]

first_qubo_cofficents, first_qubo_solution = data_size_5[9]
print(f"Coefficients : {first_qubo_cofficents}")
print(f"Solution : {first_qubo_solution}") # None because raw data
size = 5
data_size_5 = datasets[size]

first_qubo_cofficents, first_qubo_solution = data_size_5[9]
print(f"Coefficients : {first_qubo_cofficents}")
print(f"Solution : {first_qubo_solution}") # None because raw data

In [ ]:

Copied!





size = 5
for coefficients, solution  in datasets[size]:
    print(f"Size of the qubo {size}")
    print(f"Coefficients : {coefficients}")
    print(f"Solution : {solution}") # None because raw data

    break
size = 5
for coefficients, solution  in datasets[size]:
    print(f"Size of the qubo {size}")
    print(f"Coefficients : {coefficients}")
    print(f"Solution : {solution}") # None because raw data

    break

Iterate thorough all sizes¶

In [ ]:

Copied!





for size, dataset in datasets.items():
    for coefficients, solution  in dataset:
        print(f"Size of the qubo {size}")
        print(f"Coefficients : {coefficients}")
        print(f"Solution : {solution}") # None because raw data


        break
    break
for size, dataset in datasets.items():
    for coefficients, solution  in dataset:
        print(f"Size of the qubo {size}")
        print(f"Coefficients : {coefficients}")
        print(f"Solution : {solution}") # None because raw data


        break
    break

Density¶

In [ ]:

Copied!





from qubosolver.utils import calculate_density

for size, dataset in datasets.items():
    for coefficients, solution  in dataset:
        print(f"Size of the qubo {size}")
        print(f"Coefficients : {coefficients}")
        print(f"Solution : {solution}") # None because raw data
        print(f"Density : {calculate_density(coefficients, size)}") 
        break
    break
from qubosolver.utils import calculate_density

for size, dataset in datasets.items():
    for coefficients, solution  in dataset:
        print(f"Size of the qubo {size}")
        print(f"Coefficients : {coefficients}")
        print(f"Solution : {solution}") # None because raw data
        print(f"Density : {calculate_density(coefficients, size)}") 
        break
    break

Creating a QUBOInstance¶

In [ ]:

Copied!

from qubosolver import QUBOInstance
from qubosolver import QUBOInstance

In [ ]:

Copied!





for size, dataset in datasets.items():
    for coefficients, solution  in dataset:
        print(f"Size of the qubo {size}")
        print(f"Coefficients : {coefficients}")
        print(f"Solution : {solution}") # None because raw data
        
        qubo_inst = QUBOInstance(coefficients)
        print(f"QUBO Instance : {qubo_inst}")



        break
    break
for size, dataset in datasets.items():
    for coefficients, solution  in dataset:
        print(f"Size of the qubo {size}")
        print(f"Coefficients : {coefficients}")
        print(f"Solution : {solution}") # None because raw data
        
        qubo_inst = QUBOInstance(coefficients)
        print(f"QUBO Instance : {qubo_inst}")



        break
    break

Add a solution¶

We can add solutions using QUBOSolution class. This solution element is also available in the QUBOInstance, but can also be defined outside.

We can further create a dataset from these solutions

In [ ]:

Copied!

from qubosolver.data import QUBOSolution
from qubosolver.data import QUBOSolution

In [ ]:

Copied!





size = 5
coff = None
solutions = []

for i in range(10):
    coefficients, _ = datasets[size][i]
    # print(f"Size of the qubo {size}")
    # print(f"Coefficients : {coefficients}")
    if coff is None:
        coff = coefficients.unsqueeze(2)
    else:
        coff = torch.cat((coff, coefficients.unsqueeze(2)), dim=2)
    
    qubo_inst = QUBOInstance(coefficients)
    print(f"QUBO Instance : {qubo_inst}")

    # Do your processing 
    # the solution should be saved in the QUBO Instance
    # and can be extracted as
    # qubo_solution = qubo_inst.solution
    # But here we just define outside, as the solver is not defined yet. 
    qubo_sol = QUBOSolution(bitstrings=torch.Tensor([[1, 0, 1, 1, 0]]), 
                            costs=torch.Tensor([0.5]))
    solutions.append(qubo_sol)
    print("Updated Solution : ", qubo_sol)
size = 5
coff = None
solutions = []

for i in range(10):
    coefficients, _ = datasets[size][i]
    # print(f"Size of the qubo {size}")
    # print(f"Coefficients : {coefficients}")
    if coff is None:
        coff = coefficients.unsqueeze(2)
    else:
        coff = torch.cat((coff, coefficients.unsqueeze(2)), dim=2)
    
    qubo_inst = QUBOInstance(coefficients)
    print(f"QUBO Instance : {qubo_inst}")

    # Do your processing 
    # the solution should be saved in the QUBO Instance
    # and can be extracted as
    # qubo_solution = qubo_inst.solution
    # But here we just define outside, as the solver is not defined yet. 
    qubo_sol = QUBOSolution(bitstrings=torch.Tensor([[1, 0, 1, 1, 0]]), 
                            costs=torch.Tensor([0.5]))
    solutions.append(qubo_sol)
    print("Updated Solution : ", qubo_sol)

In [ ]:

Copied!

new_dataset = QUBODataset(coefficients=coff, solutions=solutions)
new_dataset = QUBODataset(coefficients=coff, solutions=solutions)

In [ ]:

Copied!

new_dataset[0]
new_dataset[0]