Skip to content

Datasets

Perceptrain can work with standard PyTorch Datasets, but it also offers some built-ins.

  • InfiniteTensorDataset: utility dataset to infinitely sample a sequence of input tensors.
  • R3Dataset: dynamic dataset that implements the R3 (retain, release, resample) technique introduced in this article, also known as evolutionary sampling. Even though R3 was originally developed for physics-informed neural networks (PINNs), R3Dataset can be used for any dataset generated by a probability distribution and whose samples can be scored via some user-defined fitness function.

Dataloaders

When using Perceptrain, you can supply classical data to a quantum machine learning algorithm by using a standard PyTorch DataLoader instance. Perceptrain also provides the DictDataLoader convenience class which allows to build dictionaries of DataLoaders instances and easily iterate over them.

import torch
from torch.utils.data import DataLoader, TensorDataset
from perceptrain import DictDataLoader, to_dataloader


def dataloader(data_size: int = 25, batch_size: int = 5, infinite: bool = False) -> DataLoader:
    x = torch.linspace(0, 1, data_size).reshape(-1, 1)
    y = torch.sin(x)
    return to_dataloader(x, y, batch_size=batch_size, infinite=infinite)


def dictdataloader(data_size: int = 25, batch_size: int = 5) -> DictDataLoader:
    dls = {}
    for k in ["y1", "y2"]:
        x = torch.rand(data_size, 1)
        y = torch.sin(x)
        dls[k] = to_dataloader(x, y, batch_size=batch_size, infinite=True)
    return DictDataLoader(dls)


# iterate over standard DataLoader
for (x,y) in dataloader(data_size=6, batch_size=2):
    print(f"Standard {x = }")

# construct an infinite dataset which will keep sampling indefinitely
n_epochs = 5
dl = iter(dataloader(data_size=6, batch_size=2, infinite=True))
for _ in range(n_epochs):
    (x, y) = next(dl)
    print(f"Infinite {x = }")

# iterate over DictDataLoader
ddl = dictdataloader()
data = next(iter(ddl))
print(f"{data = }")
Standard x = tensor([[0.0000],
        [0.2000]])
Standard x = tensor([[0.4000],
        [0.6000]])
Standard x = tensor([[0.8000],
        [1.0000]])
Infinite x = tensor([[0.0000],
        [0.2000]])
Infinite x = tensor([[0.8000],
        [1.0000]])
Infinite x = tensor([[0.4000],
        [0.6000]])
Infinite x = tensor([[0.0000],
        [0.2000]])
Infinite x = tensor([[0.8000],
        [1.0000]])
data = {'y1': [tensor([[0.6830],
        [0.2882],
        [0.3586],
        [0.3201],
        [0.8228]]), tensor([[0.6311],
        [0.2842],
        [0.3510],
        [0.3147],
        [0.7330]])], 'y2': [tensor([[0.9122],
        [0.8080],
        [0.9102],
        [0.7443],
        [0.7366]]), tensor([[0.7909],
        [0.7229],
        [0.7896],
        [0.6774],
        [0.6718]])]}

Note: In case of infinite=True, the dataloader iterator will provide a random sample from the dataset.