Update README.md
Browse files
README.md
CHANGED
|
@@ -53,15 +53,67 @@ These side-by-side charts highlight mochaV2’s stronger performance in grammar,
|
|
| 53 |
Getting started is easy! Just load the model and tokenizer like this:
|
| 54 |
|
| 55 |
```python
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
tokenizer = AutoTokenizer.from_pretrained("path/to/mochaV2")
|
| 59 |
-
model = AutoModelForCausalLM.from_pretrained("path/to/mochaV2")
|
| 60 |
-
|
| 61 |
-
prompt = "The Eiffel Tower is"
|
| 62 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
| 63 |
-
outputs = model.generate(**inputs, max_new_tokens=50)
|
| 64 |
-
print(tokenizer.decode(outputs[0]))
|
| 65 |
```
|
| 66 |
|
| 67 |
---
|
|
|
|
| 53 |
Getting started is easy! Just load the model and tokenizer like this:
|
| 54 |
|
| 55 |
```python
|
| 56 |
+
import json
|
| 57 |
+
import torch
|
| 58 |
+
import gradio as gr
|
| 59 |
+
from transformers import AutoModelForCausalLM
|
| 60 |
+
from huggingface_hub import hf_hub_download
|
| 61 |
+
# HF repo containing your model
|
| 62 |
+
repo_id = "theguywhosucks/mochaV2"
|
| 63 |
+
# Download tokenizer files
|
| 64 |
+
itos_file = hf_hub_download(repo_id, "itos.json")
|
| 65 |
+
stoi_file = hf_hub_download(repo_id, "stoi.json")
|
| 66 |
+
with open(stoi_file) as f:
|
| 67 |
+
stoi = json.load(f)
|
| 68 |
+
with open(itos_file) as f:
|
| 69 |
+
itos = json.load(f)
|
| 70 |
+
# Convert itos dict -> list if needed
|
| 71 |
+
if isinstance(itos, dict):
|
| 72 |
+
itos = [itos[str(i)] for i in range(len(itos))]
|
| 73 |
+
# Tokenizer
|
| 74 |
+
class SimpleTokenizer:
|
| 75 |
+
def __init__(self, stoi, itos):
|
| 76 |
+
self.stoi = stoi
|
| 77 |
+
self.itos = itos
|
| 78 |
+
self.unk_token = "<unk>" if "<unk>" in stoi else itos[0]
|
| 79 |
+
def encode(self, text):
|
| 80 |
+
return [self.stoi.get(c, self.stoi.get(self.unk_token, 0)) for c in text]
|
| 81 |
+
def decode(self, ids):
|
| 82 |
+
return "".join([self.itos[i] if i < len(self.itos) else self.unk_token for i in ids])
|
| 83 |
+
tokenizer = SimpleTokenizer(stoi, itos)
|
| 84 |
+
# Load model
|
| 85 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 86 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 87 |
+
repo_id,
|
| 88 |
+
torch_dtype=torch.float32,
|
| 89 |
+
trust_remote_code=True
|
| 90 |
+
)
|
| 91 |
+
model.to(device)
|
| 92 |
+
model.eval()
|
| 93 |
+
# Gradio function
|
| 94 |
+
def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
|
| 95 |
+
input_ids = torch.tensor([tokenizer.encode(prompt)]).to(device)
|
| 96 |
+
with torch.no_grad():
|
| 97 |
+
outputs = model.generate(
|
| 98 |
+
input_ids,
|
| 99 |
+
max_new_tokens=max_new_tokens,
|
| 100 |
+
do_sample=True,
|
| 101 |
+
temperature=temperature
|
| 102 |
+
)
|
| 103 |
+
return tokenizer.decode(outputs[0].tolist())
|
| 104 |
+
# Launch Gradio app
|
| 105 |
+
gr.Interface(
|
| 106 |
+
fn=complete_sentence,
|
| 107 |
+
inputs=[
|
| 108 |
+
gr.Textbox(label="Prompt"),
|
| 109 |
+
gr.Slider(10, 200, value=50, step=10, label="Max new tokens"),
|
| 110 |
+
gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
|
| 111 |
+
],
|
| 112 |
+
outputs=gr.Textbox(label="Completed Text"),
|
| 113 |
+
title="Mocha Sentence Completion",
|
| 114 |
+
description="Enter a prompt and get AI completions from your model."
|
| 115 |
+
).launch()
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
```
|
| 118 |
|
| 119 |
---
|