File size: 3,289 Bytes
2ffe4d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | {
"models": [
{
"name": "Gemma-3n-E2B-it-int4",
"modelId": "google/gemma-3n-E2B-it-litert-preview",
"modelFile": "gemma-3n-E2B-it-int4.task",
"description": "Preview version of [Gemma 3n E2B](https://ai.google.dev/gemma/docs/gemma-3n) ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference). The current checkpoint only supports text and vision input, with 4096 context length.",
"sizeInBytes": 3136226711,
"estimatedPeakMemoryInBytes": 5905580032,
"version": "20250520",
"llmSupportImage": true,
"defaultConfig": {
"topK": 64,
"topP": 0.95,
"temperature": 1.0,
"maxTokens": 4096,
"accelerators": "cpu,gpu"
},
"taskTypes": ["llm_chat", "llm_prompt_lab", "llm_ask_image"]
},
{
"name": "Gemma-3n-E4B-it-int4",
"modelId": "google/gemma-3n-E4B-it-litert-preview",
"modelFile": "gemma-3n-E4B-it-int4.task",
"description": "Preview version of [Gemma 3n E4B](https://ai.google.dev/gemma/docs/gemma-3n) ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference). The current checkpoint only supports text and vision input, with 4096 context length.",
"sizeInBytes": 4405655031,
"estimatedPeakMemoryInBytes": 6979321856,
"version": "20250520",
"llmSupportImage": true,
"defaultConfig": {
"topK": 64,
"topP": 0.95,
"temperature": 1.0,
"maxTokens": 4096,
"accelerators": "cpu,gpu"
},
"taskTypes": ["llm_chat", "llm_prompt_lab", "llm_ask_image"]
},
{
"name": "Gemma3-1B-IT q4",
"modelId": "litert-community/Gemma3-1B-IT",
"modelFile": "Gemma3-1B-IT_multi-prefill-seq_q4_ekv2048.task",
"description": "A variant of [google/Gemma-3-1B-IT](https://huggingface.co/google/Gemma-3-1B-IT) with 4-bit quantization ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference)",
"sizeInBytes": 554661246,
"estimatedPeakMemoryInBytes": 2147483648,
"version": "20250514",
"defaultConfig": {
"topK": 64,
"topP": 0.95,
"temperature": 1.0,
"maxTokens": 1024,
"accelerators": "gpu,cpu"
},
"taskTypes": ["llm_chat", "llm_prompt_lab"]
},
{
"name": "Qwen2.5-1.5B-Instruct q8",
"modelId": "litert-community/Qwen2.5-1.5B-Instruct",
"modelFile": "Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task",
"description": "A variant of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) with 8-bit quantization ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference)",
"sizeInBytes": 1625493432,
"estimatedPeakMemoryInBytes": 2684354560,
"version": "20250514",
"defaultConfig": {
"topK": 40,
"topP": 0.95,
"temperature": 1.0,
"maxTokens": 1024,
"accelerators": "cpu"
},
"taskTypes": ["llm_chat", "llm_prompt_lab"]
}
]
}
|