Datasets#

Ego4D dataset#

from torch.utils.data import DataLoader
from streamer.datasets.ego4d_dataset import Ego4dDatasetArguments, Ego4dDataset

ego4d_args = Ego4dDatasetArguments(world_size=8,
                                   rank=1,
                                   dataset='ego4d',
                                   frame_size=[128, 128],
                                   percentage=25,
                                   split='train')
ego4d_dataset = Ego4dDataset(ego4d_args)
loader = DataLoader(dataset=ego4d_dataset, batch_size=1, num_workers= 1, pin_memory=True)
class streamer.datasets.ego4d_dataset.Ego4dDatasetArguments(world_size: int, global_rank: int, dataset: str, frame_size: list, percentage: int, split: str = 'train')[source]#

Arguments for Ego4d dataset

world_size: int#

Number of gpus to distribute the dataset

global_rank: int#

The rank of the running device

dataset: str#

The dataset name

frame_size: list#

The frame size to which the images will be resized

percentage: int#

Percentage of the dataset to run

split: str = 'train'#

The split of data to use. Choices: [‘train’, ‘test’]

class streamer.datasets.ego4d_dataset.Ego4dDataset(args: Ego4dDatasetArguments)[source]#

The ego4d dataset that iterates over the whole dataset and returns the frames one by one in a streaming fashion.

Parameters:

args (Ego4dDatasetArguments) – The parameters used for the Ego4d dataset

__getitem__(index)[source]#

Iterates over the dataset in a streaming fashion and retrieves one frame at a time

Parameters:

index (int) – The index of the item in the dataset to retrieve

Returns:

  • (torch.tensor): the frame in tensor format

  • (List): Information about the frame [Video, duration, index, time, last_frame]


EPIC-KITCHENS dataset#

from torch.utils.data import DataLoader
from streamer.datasets.epic_dataset import EpicDatasetArguments, EpicDataset

epic_args = EpicDatasetArguments(world_size=8,
                                  rank=1,
                                  dataset='ego4d',
                                  frame_size=[128, 128],
                                  percentage=25,
                                  split='train')
epic_dataset = EpicDataset(epic_args)
loader = DataLoader(dataset=epic_dataset, batch_size=1, num_workers= 1, pin_memory=True)
class streamer.datasets.epic_dataset.EpicDatasetArguments(world_size: int, global_rank: int, dataset: str, frame_size: list, percentage: int, split: str = 'train')[source]#

Arguments for Epic-Kitchens dataset

world_size: int#

Number of gpus to distribute the dataset

global_rank: int#

The rank of the running device

dataset: str#

The dataset name

frame_size: list#

The frame size to which the images will be resized

percentage: int#

Percentage of the dataset to run

split: str = 'train'#

The split of data to use. Choices: [‘train’, ‘test’]

class streamer.datasets.epic_dataset.EpicDataset(args: EpicDatasetArguments)[source]#

The epic-kitchens dataset that iterates over the whole dataset and returns the frames one by one in a streaming fashion.

Parameters:

args (EpicDatasetArguments) – The parameters used for the Epic-Kitchens dataset

__getitem__(index)[source]#

Iterates over the dataset in a streaming fashion and retrieves one frame at a time

Parameters:

index (int) – The index of the item in the dataset to retrieve

Returns:

  • (torch.tensor): the frame in tensor format

  • (List): Information about the frame [Video, duration, index, time, last_frame]