Models#
Streamer Model#
from streamer.models import CNNEncoder, CNNDecoder
from streamer.models import StreamerModelArguments, StreamerModel
model_args = StreamerModelArguments(max_layers=3,
feature_dim=1024,
evolve_every=50000,
buffer_size=20,
force_fixed_buffer=True,
lr=0.0001,
init_ckpt='',
snippet_size=0.5,
demarcation_mode='average',
window_size=50,
distance_mode='similarity',
force_base_dist=False)
model = StreamerModel(args=model_args, logger=None, encoder=CNNEncoder, decoder=CNNDecoder)
- class streamer.models.model.StreamerModelArguments(log_base_every: int = 1000, main: bool = True, max_layers: int = 3, feature_dim: int = 1024, evolve_every: int = 50000, buffer_size: int = 10, force_fixed_buffer: bool = False, loss_threshold: float = 0.25, lr: float = 0.0001, init_layers: int = 1, init_ckpt: str = '', ckpt_dir: str = '', snippet_size: float = 0.5, demarcation_mode: str = 'average', distance_mode: str = 'distance', force_base_dist: bool = True, window_size: int = 50, modifier_type: str = 'multiply', modifier: float = 1.0)[source]#
- log_base_every: int = 1000#
Tensorboard log of the base layer every ‘log_base_every’
- main: bool = True#
Main process/gpu or not
- max_layers: int = 3#
The maximum number of layers to stack
- feature_dim: int = 1024#
Feature dimension of the model embeddings
- evolve_every: int = 50000#
Create/stack a new layer every ‘evolve_every’
- buffer_size: int = 10#
Maximum input buffer size to be used
- force_fixed_buffer: bool = False#
Force the buffer to be fixed (not replacing inputs) by triggering a boundary when buffer is full
- loss_threshold: float = 0.25#
Loss threshold value. Not used in average demarcation mode
- lr: float = 0.0001#
Learning rate to be used in all modules
- init_layers: int = 1#
How many layers to initialize before training
- init_ckpt: str = ''#
the path of the pretrained weights, if any
- ckpt_dir: str = ''#
the path to save weights
- snippet_size: float = 0.5#
Snippet size of input video (seconds/image). Typically 0.5 seconds per image
- demarcation_mode: str = 'average'#
Demarcation mode used to detect boundaries
- distance_mode: str = 'distance'#
Distance mode for loss calculation
- force_base_dist: bool = True#
Force the lowest layer to use MSE instead of Cosine Similarity
- window_size: int = 50#
Window size for average demarcation mode
- modifier_type: str = 'multiply'#
Modifier type to apply to average demarcation mode [‘multiply’, ‘add’]
- modifier: float = 1.0#
Modifier to apply to avrrage demarcation mode
- class streamer.models.model.StreamerModel(args: StreamerModelArguments, logger=None, encoder=None, decoder=None)[source]#
The implementation of the STREAMER model for training. This class initializes the first layer(s) of streamer, saves/loads weights, etc.
- Parameters:
args (StreamerModelArguments) – The arguments passed to Streamer Model
logger (Logger) – The tensorboard logger class
encoder (torch.nn.Module) – The encoder model (e.g.,
CNNEncoder
)decoder (torch.nn.Module) – The decoder model (e.g.,
CNNDecoder
)
- __initialize(ckpt='', count=1, encoder=None, decoder=None)#
Initializes the Streamer layer(s) with checkpoint if available.
- Parameters:
ckpt (str) – pretrained weights location
count (int) – How many layers to create
encoder (torch.nn.Module) – The encoder model (e.g.,
CNNEncoder
)decoder (torch.nn.Module) – The decoder model (e.g.,
CNNDecoder
)
- forward(x)[source]#
Forward propagation function that calls the
forward()
function of the firstStreamerLayer
.- Parameters:
x (torch.Tensor) – the input image [1, 3, H, W]
- getReps(layer_num)[source]#
Aggregates the representations from all the layers.
- Parameters:
layer_num (int) – the index of the calling layer
- Returns:
(torch.Tensor): concatenated representations from all layers [L, feature_dim]
- reset_model()[source]#
Resets the whole streamer model for a new video. Calls recursive function
reset_layer()
on theStreamerLayer
class
- optimize_model()[source]#
Optimizes the whole streamer model (gradient step). Calls recursive function
optimize_layer()
on theStreamerLayer
class
Streamer Inference#
from streamer.models.inference_model import InferenceModel
model = InferenceModel(checkpoint='to/checkpoint/path/')
result = model(filename='to/video/file/path')
- class streamer.models.inference_model.InferenceLoader(modality, filename, snippet_size)[source]#
Dataloader used by InferenceModel to generate the images from the video
- Parameters:
modality (str) – only video is supported now
filename (str) – Video filename to load
snippet_size (float) – How many seconds per frame (typically 0.5)
- class streamer.models.inference_model.InferenceModel(checkpoint, logger=None)[source]#
Inference model used to run inference on a video using pretrained weights
- Parameters:
checkpoint (dict) – the loaded checkpoint of pretrained model
logger (Logger) – tensorboard logger, if needed
- get_model()[source]#
Creates a streamer model and initializes it with ckpt.
- Returns:
(
StreamerModel
): The initialized StreamerModel and moved to CUDA device