Spaces:
Sleeping
Sleeping
| from pydantic import BaseModel, Field | |
| class GgufParser(BaseModel): | |
| metadata: "Metadata" | |
| architecture: "Architecture" | |
| tokenizer: "Tokenizer" | |
| estimate: "Estimate" | |
| class Metadata(BaseModel): | |
| type_: str = Field(alias="type") | |
| architecture: str | |
| quantization_version: int = Field(alias="quantizationVersion") | |
| alignment: int | |
| name: str | |
| file_type: int = Field(alias="fileType") | |
| little_endian: bool = Field(alias="littleEndian") | |
| file_size: int = Field(alias="fileSize") | |
| size: int | |
| parameters: int | |
| bits_per_weight: float = Field(alias="bitsPerWeight") | |
| class Architecture(BaseModel): | |
| type_: str = Field(alias="type") | |
| architecture: str | |
| maximum_context_length: int = Field(alias="maximumContextLength") | |
| embedding_length: int = Field(alias="embeddingLength") | |
| block_count: int = Field(alias="blockCount") | |
| feed_forward_length: int = Field(alias="feedForwardLength") | |
| attention_head_count: int = Field(alias="attentionHeadCount") | |
| attention_head_count_kv: int = Field(alias="attentionHeadCountKV") | |
| attention_layer_norm_rmse_epsilon: float = Field( | |
| alias="attentionLayerNormRMSEpsilon" | |
| ) | |
| attention_key_length: int = Field(alias="attentionKeyLength") | |
| attention_value_length: int = Field(alias="attentionValueLength") | |
| attention_causal: bool = Field(alias="attentionCausal") | |
| rope_dimension_count: int = Field(None, alias="ropeDimensionCount") | |
| rope_frequency_base: int = Field(None, alias="ropeFrequencyBase") | |
| vocabulary_length: int = Field(alias="vocabularyLength") | |
| embedding_gqa: int = Field(alias="embeddingGQA") | |
| embedding_key_gqa: int = Field(alias="embeddingKeyGQA") | |
| embedding_value_gqa: int = Field(alias="embeddingValueGQA") | |
| class Tokenizer(BaseModel): | |
| model: str | |
| tokens_length: int = Field(alias="tokensLength") | |
| merges_length: int = Field(alias="mergesLength") | |
| added_token_length: int = Field(alias="addedTokenLength") | |
| bos_token_id: int = Field(alias="bosTokenID") | |
| eos_token_id: int = Field(alias="eosTokenID") | |
| eot_token_id: int = Field(alias="eotTokenID") | |
| eom_token_id: int = Field(alias="eomTokenID") | |
| unknown_token_id: int = Field(alias="unknownTokenID") | |
| separator_token_id: int = Field(alias="separatorTokenID") | |
| padding_token_id: int = Field(alias="paddingTokenID") | |
| tokens_size: int = Field(alias="tokensSize") | |
| merges_size: int = Field(alias="mergesSize") | |
| class Ram(BaseModel): | |
| handle_layers: int = Field(alias="handleLayers") | |
| handle_last_layer: int = Field(alias="handleLastLayer") | |
| handle_output_layer: bool = Field(alias="handleOutputLayer") | |
| remote: bool | |
| position: int | |
| uma: int | |
| nonuma: int | |
| class Item(BaseModel): | |
| offload_layers: int = Field(alias="offloadLayers") | |
| full_offloaded: bool = Field(alias="fullOffloaded") | |
| maximum_tokens_per_second: float = Field(None, alias="maximumTokensPerSecond") | |
| ram: "Ram" | |
| vrams: list["Ram"] | |
| class Estimate(BaseModel): | |
| items: list["Item"] | |
| type_: str = Field(alias="type") | |
| architecture: str | |
| context_size: int = Field(alias="contextSize") | |
| flash_attention: bool = Field(alias="flashAttention") | |
| no_mmap: bool = Field(alias="noMMap") | |
| embedding_only: bool = Field(alias="embeddingOnly") | |
| reranking: bool | |
| distributable: bool | |
| logical_batch_size: int = Field(alias="logicalBatchSize") | |
| physical_batch_size: int = Field(alias="physicalBatchSize") | |
| type_: str = Field(alias="type") | |
| architecture: str | |
| context_size: int = Field(alias="contextSize") | |
| flash_attention: bool = Field(alias="flashAttention") | |
| no_mmap: bool = Field(alias="noMMap") | |
| embedding_only: bool = Field(alias="embeddingOnly") | |
| reranking: bool | |
| distributable: bool | |
| logical_batch_size: int = Field(alias="logicalBatchSize") | |
| physical_batch_size: int = Field(alias="physicalBatchSize") | |