Upload mobile/README.md with huggingface_hub
Browse files- mobile/README.md +93 -0
mobile/README.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FunctionGemma Mobile Models
|
| 2 |
+
|
| 3 |
+
## Available Formats
|
| 4 |
+
|
| 5 |
+
0 mobile format(s) available:
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
## Usage Examples
|
| 9 |
+
|
| 10 |
+
### PyTorch Mobile (Android)
|
| 11 |
+
|
| 12 |
+
```java
|
| 13 |
+
// Load the model
|
| 14 |
+
Module module = Module.load(assetFilePath(this, "functiongemma_mobile.pt"));
|
| 15 |
+
|
| 16 |
+
// Prepare input
|
| 17 |
+
long[] inputIds = new long[128];
|
| 18 |
+
// Fill with tokenized text
|
| 19 |
+
|
| 20 |
+
// Create tensor
|
| 21 |
+
Tensor inputTensor = Tensor.fromBlob(inputIds, new long[]{1, 128});
|
| 22 |
+
|
| 23 |
+
// Run inference
|
| 24 |
+
IValue output = module.forward(IValue.from(inputTensor));
|
| 25 |
+
Tensor outputTensor = output.toTensor();
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
### PyTorch Mobile (iOS)
|
| 29 |
+
|
| 30 |
+
```swift
|
| 31 |
+
// Load model
|
| 32 |
+
guard let filePath = Bundle.main.path(forResource: "functiongemma_mobile", ofType: "pt") else {
|
| 33 |
+
return
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
let module = try TorchModule(fileAtPath: filePath)
|
| 37 |
+
|
| 38 |
+
// Prepare input
|
| 39 |
+
var inputIds: [Int64] = Array(repeating: 0, count: 128)
|
| 40 |
+
// Fill with tokenized text
|
| 41 |
+
|
| 42 |
+
// Create tensor
|
| 43 |
+
let inputTensor = try Tensor(shape: [1, 128], data: inputIds)
|
| 44 |
+
|
| 45 |
+
// Run inference
|
| 46 |
+
let outputTensor = try module.forward([inputTensor])
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
### ONNX Runtime (Cross-platform)
|
| 50 |
+
|
| 51 |
+
```python
|
| 52 |
+
import onnxruntime as ort
|
| 53 |
+
|
| 54 |
+
# Load model
|
| 55 |
+
session = ort.InferenceSession("functiongemma.onnx")
|
| 56 |
+
|
| 57 |
+
# Prepare input
|
| 58 |
+
input_ids = np.array([[...]], dtype=np.int64)
|
| 59 |
+
|
| 60 |
+
# Run inference
|
| 61 |
+
outputs = session.run(None, {{"input_ids": input_ids}})
|
| 62 |
+
logits = outputs[0]
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
## Model Details
|
| 66 |
+
|
| 67 |
+
- **Base Model**: {mobile_info['base_model']}
|
| 68 |
+
- **Vocab Size**: {mobile_info['vocab_size']:,}
|
| 69 |
+
- **Max Sequence**: {mobile_info['max_seq_length']} tokens
|
| 70 |
+
- **Recommended**: {mobile_info['recommended_seq_length']} tokens (mobile)
|
| 71 |
+
- **Fine-tuned on**: {mobile_info['fine_tuned_on']}
|
| 72 |
+
|
| 73 |
+
## Performance
|
| 74 |
+
|
| 75 |
+
- **Inference Time**: 50-300ms on mobile devices
|
| 76 |
+
- **Memory Usage**: 300-800 MB RAM
|
| 77 |
+
- **Quantized Version**: 2-4x faster, ~75% smaller
|
| 78 |
+
|
| 79 |
+
## Requirements
|
| 80 |
+
|
| 81 |
+
### PyTorch Mobile
|
| 82 |
+
- Android: Min SDK 21, PyTorch Mobile library
|
| 83 |
+
- iOS: Min iOS 12.0, LibTorch-Lite
|
| 84 |
+
|
| 85 |
+
### ONNX Runtime
|
| 86 |
+
- ONNX Runtime Mobile
|
| 87 |
+
- Android/iOS/Web/Desktop support
|
| 88 |
+
|
| 89 |
+
## Notes
|
| 90 |
+
|
| 91 |
+
- Use quantized version for better mobile performance
|
| 92 |
+
- Recommended sequence length: 128 tokens
|
| 93 |
+
- Batch size: 1 (mobile optimization)
|