class documentation

Undocumented

Method __init__ Undocumented
Method build_attention_mask Undocumented
Method encode_image Undocumented
Method encode_text Undocumented
Method forward Undocumented
Method initialize_parameters Undocumented
Instance Variable context_length Undocumented
Instance Variable ln_final Undocumented
Instance Variable logit_scale Undocumented
Instance Variable positional_embedding Undocumented
Instance Variable text_projection Undocumented
Instance Variable token_embedding Undocumented
Instance Variable transformer Undocumented
Instance Variable visual Undocumented
Instance Variable vocab_size Undocumented
Property dtype Undocumented
def __init__(self, embed_dim: int, image_resolution: int, vision_layers: tuple[int, int, int, int] | int, vision_width: int, vision_patch_size: int, context_length: int, vocab_size: int, transformer_width: int, transformer_heads: int, transformer_layers: int): (source)

Undocumented

def build_attention_mask(self): (source)

Undocumented

def encode_image(self, image): (source)

Undocumented

def encode_text(self, text): (source)

Undocumented

def forward(self, image, text): (source)

Undocumented

def initialize_parameters(self): (source)

Undocumented

context_length = (source)

Undocumented

ln_final = (source)

Undocumented

logit_scale = (source)

Undocumented

positional_embedding = (source)

Undocumented

text_projection = (source)

Undocumented

token_embedding = (source)

Undocumented

transformer = (source)

Undocumented

Undocumented

vocab_size = (source)

Undocumented

Undocumented