Create datasets¶
We create datasets for different sizes and save them in data/raw_datasets folder
In [ ]:
Copied!
import os
import torch
from pathlib import Path
from qubosolver import QUBODataset
from qubosolver.saveload import save_qubo_dataset, load_qubo_dataset
output_directory = Path(str(os.path.abspath("01-dataset-generation-and-loading")).replace("docs/tutorial", "qubosolver_logs/tutorial"))
dataset_sizes = range(5, 101, 5)
instances_per_size = 10
densities_list = [0.6]
coefficient_bounds = (-100.0, 100.0)
seed = 42
def create_and_save_dataset(
output_dir: str,
dataset_name: str,
size: int,
num_instances: int,
densities: list[float],
coefficient_bounds: tuple[float, float],
device: str = "cpu",
dtype: torch.dtype = torch.float32,
seed: int | None = None,
):
"""
Create a QUBODataset with bounds on fixed coefficient and save.
Args:
output_dir (str): Output directory.
dataset_name (str): File name to generate.
size (int): Dimension of QUBO (size x size).
num_instances (int): Number of instances for each density.
densities (list[float]): List of densities (ratio of non-null elements).
coefficient_bounds (tuple[float, float]): Interval (min, max) of non-null values.
device (str): Device (“cpu” ou “cuda”).
dtype (torch.dtype): Tensor dtype.
seed (int | None): Seed for reproductibility.
"""
# Génère le dataset
dataset = QUBODataset.from_random(
n_matrices=num_instances,
matrix_dim=size,
densities=densities,
coefficient_bounds=coefficient_bounds,
device=device,
dtype=dtype,
seed=seed,
)
os.makedirs(output_dir, exist_ok=True)
file_path = os.path.join(output_dir, dataset_name)
save_qubo_dataset(dataset, file_path)
print(f"Dataset saved to {file_path}")
## to generate dataset, uncomment the below
# if __name__ == "__main__":
# for size in dataset_sizes:
# fname = f"raw_qubo_dataset_size_{size}.pt"
# create_and_save_dataset(
# output_dir=output_directory,
# dataset_name=fname,
# size=size,
# num_instances=instances_per_size,
# densities=densities_list,
# coefficient_bounds=coefficient_bounds,
# device="cpu",
# dtype=torch.float32,
# seed=seed,
# )
import os
import torch
from pathlib import Path
from qubosolver import QUBODataset
from qubosolver.saveload import save_qubo_dataset, load_qubo_dataset
output_directory = Path(str(os.path.abspath("01-dataset-generation-and-loading")).replace("docs/tutorial", "qubosolver_logs/tutorial"))
dataset_sizes = range(5, 101, 5)
instances_per_size = 10
densities_list = [0.6]
coefficient_bounds = (-100.0, 100.0)
seed = 42
def create_and_save_dataset(
output_dir: str,
dataset_name: str,
size: int,
num_instances: int,
densities: list[float],
coefficient_bounds: tuple[float, float],
device: str = "cpu",
dtype: torch.dtype = torch.float32,
seed: int | None = None,
):
"""
Create a QUBODataset with bounds on fixed coefficient and save.
Args:
output_dir (str): Output directory.
dataset_name (str): File name to generate.
size (int): Dimension of QUBO (size x size).
num_instances (int): Number of instances for each density.
densities (list[float]): List of densities (ratio of non-null elements).
coefficient_bounds (tuple[float, float]): Interval (min, max) of non-null values.
device (str): Device (“cpu” ou “cuda”).
dtype (torch.dtype): Tensor dtype.
seed (int | None): Seed for reproductibility.
"""
# Génère le dataset
dataset = QUBODataset.from_random(
n_matrices=num_instances,
matrix_dim=size,
densities=densities,
coefficient_bounds=coefficient_bounds,
device=device,
dtype=dtype,
seed=seed,
)
os.makedirs(output_dir, exist_ok=True)
file_path = os.path.join(output_dir, dataset_name)
save_qubo_dataset(dataset, file_path)
print(f"Dataset saved to {file_path}")
## to generate dataset, uncomment the below
# if __name__ == "__main__":
# for size in dataset_sizes:
# fname = f"raw_qubo_dataset_size_{size}.pt"
# create_and_save_dataset(
# output_dir=output_directory,
# dataset_name=fname,
# size=size,
# num_instances=instances_per_size,
# densities=densities_list,
# coefficient_bounds=coefficient_bounds,
# device="cpu",
# dtype=torch.float32,
# seed=seed,
# )
Load datasets¶
Here we load the datasets and show they can be used.
In [ ]:
Copied!
def load_datasets_by_size(directory: str):
"""
Loads datasets from a directory by extracting the size from filenames.
Args:
directory (str): Path to the directory containing dataset files.
Returns:
dict[int, torch.Tensor]: A dictionary where keys are sizes and values are loaded datasets.
"""
# Regular expression to match filenames like "raw_qubo_dataset_size_{size}.pt"
pattern = r"raw_qubo_dataset_size_(\d+)\.pt"
datasets_by_size = {}
os.makedirs(directory, exist_ok=True)
for filename in os.listdir(directory):
match = re.match(pattern, filename)
if match:
size = int(match.group(1))
file_path = os.path.join(directory, filename)
dataset = load_qubo_dataset(file_path)
datasets_by_size[size] = dataset
print(f"Loaded dataset with size {size} from {file_path}")
return datasets_by_size
def load_datasets_by_size(directory: str):
"""
Loads datasets from a directory by extracting the size from filenames.
Args:
directory (str): Path to the directory containing dataset files.
Returns:
dict[int, torch.Tensor]: A dictionary where keys are sizes and values are loaded datasets.
"""
# Regular expression to match filenames like "raw_qubo_dataset_size_{size}.pt"
pattern = r"raw_qubo_dataset_size_(\d+)\.pt"
datasets_by_size = {}
os.makedirs(directory, exist_ok=True)
for filename in os.listdir(directory):
match = re.match(pattern, filename)
if match:
size = int(match.group(1))
file_path = os.path.join(directory, filename)
dataset = load_qubo_dataset(file_path)
datasets_by_size[size] = dataset
print(f"Loaded dataset with size {size} from {file_path}")
return datasets_by_size
Datasets is a dict for all qubo datasets for different sizes
- datasets[5] = will give us the qubodatasets of size 5 with different densities and different disparities.
- the qubo dataset also has the solution component - which is None for all the datasets in the raw_datasets folder.
In [ ]:
Copied!
datasets = load_datasets_by_size(output_directory)
datasets = load_datasets_by_size(output_directory)
In [ ]:
Copied!
datasets
datasets
Dataset of a specific size¶
In [ ]:
Copied!
size = 5
data_size_5 = datasets[size]
first_qubo_cofficents, first_qubo_solution = data_size_5[9]
print(f"Coefficients : {first_qubo_cofficents}")
print(f"Solution : {first_qubo_solution}") # None because raw data
size = 5
data_size_5 = datasets[size]
first_qubo_cofficents, first_qubo_solution = data_size_5[9]
print(f"Coefficients : {first_qubo_cofficents}")
print(f"Solution : {first_qubo_solution}") # None because raw data
In [ ]:
Copied!
size = 5
for coefficients, solution in datasets[size]:
print(f"Size of the qubo {size}")
print(f"Coefficients : {coefficients}")
print(f"Solution : {solution}") # None because raw data
break
size = 5
for coefficients, solution in datasets[size]:
print(f"Size of the qubo {size}")
print(f"Coefficients : {coefficients}")
print(f"Solution : {solution}") # None because raw data
break
Iterate thorough all sizes¶
In [ ]:
Copied!
for size, dataset in datasets.items():
for coefficients, solution in dataset:
print(f"Size of the qubo {size}")
print(f"Coefficients : {coefficients}")
print(f"Solution : {solution}") # None because raw data
break
break
for size, dataset in datasets.items():
for coefficients, solution in dataset:
print(f"Size of the qubo {size}")
print(f"Coefficients : {coefficients}")
print(f"Solution : {solution}") # None because raw data
break
break
Density¶
In [ ]:
Copied!
from qubosolver.utils import calculate_density
for size, dataset in datasets.items():
for coefficients, solution in dataset:
print(f"Size of the qubo {size}")
print(f"Coefficients : {coefficients}")
print(f"Solution : {solution}") # None because raw data
print(f"Density : {calculate_density(coefficients, size)}")
break
break
from qubosolver.utils import calculate_density
for size, dataset in datasets.items():
for coefficients, solution in dataset:
print(f"Size of the qubo {size}")
print(f"Coefficients : {coefficients}")
print(f"Solution : {solution}") # None because raw data
print(f"Density : {calculate_density(coefficients, size)}")
break
break
Creating a QUBOInstance¶
In [ ]:
Copied!
from qubosolver import QUBOInstance
from qubosolver import QUBOInstance
In [ ]:
Copied!
for size, dataset in datasets.items():
for coefficients, solution in dataset:
print(f"Size of the qubo {size}")
print(f"Coefficients : {coefficients}")
print(f"Solution : {solution}") # None because raw data
qubo_inst = QUBOInstance(coefficients)
print(f"QUBO Instance : {qubo_inst}")
break
break
for size, dataset in datasets.items():
for coefficients, solution in dataset:
print(f"Size of the qubo {size}")
print(f"Coefficients : {coefficients}")
print(f"Solution : {solution}") # None because raw data
qubo_inst = QUBOInstance(coefficients)
print(f"QUBO Instance : {qubo_inst}")
break
break
Add a solution¶
We can add solutions using QUBOSOlution class. This solution element is also available in the QUBOInstance, but can also be defined outside.
- We can further create a dataset from these solutions
In [ ]:
Copied!
from qubosolver.data import QUBOSolution
from qubosolver.data import QUBOSolution
In [ ]:
Copied!
size = 5
coff = None
solutions = []
for i in range(10):
coefficients, _ = datasets[size][i]
# print(f"Size of the qubo {size}")
# print(f"Coefficients : {coefficients}")
if coff is None:
coff = coefficients.unsqueeze(2)
else:
coff = torch.cat((coff, coefficients.unsqueeze(2)), dim=2)
qubo_inst = QUBOInstance(coefficients)
print(f"QUBO Instance : {qubo_inst}")
# Do your processing
# the solution should be saved in the QUBO Instance
# and can be extracted as
# qubo_solution = qubo_inst.solution
# But here we just define outside, as the solver is not defined yet.
qubo_sol = QUBOSolution(bitstrings=torch.Tensor([[1, 0, 1, 1, 0]]),
costs=torch.Tensor([0.5]))
solutions.append(qubo_sol)
print("Updated Solution : ", qubo_sol)
size = 5
coff = None
solutions = []
for i in range(10):
coefficients, _ = datasets[size][i]
# print(f"Size of the qubo {size}")
# print(f"Coefficients : {coefficients}")
if coff is None:
coff = coefficients.unsqueeze(2)
else:
coff = torch.cat((coff, coefficients.unsqueeze(2)), dim=2)
qubo_inst = QUBOInstance(coefficients)
print(f"QUBO Instance : {qubo_inst}")
# Do your processing
# the solution should be saved in the QUBO Instance
# and can be extracted as
# qubo_solution = qubo_inst.solution
# But here we just define outside, as the solver is not defined yet.
qubo_sol = QUBOSolution(bitstrings=torch.Tensor([[1, 0, 1, 1, 0]]),
costs=torch.Tensor([0.5]))
solutions.append(qubo_sol)
print("Updated Solution : ", qubo_sol)
In [ ]:
Copied!
new_dataset = QUBODataset(coefficients=coff, solutions=solutions)
new_dataset = QUBODataset(coefficients=coff, solutions=solutions)
In [ ]:
Copied!
new_dataset[0]
new_dataset[0]