Spaces:

theguywhosucks
/

mochaV2

Runtime error

App Files Files Community

theguywhosucks commited on Sep 17, 2025

Commit

69f32b6

verified ·

1 Parent(s): 82e1d8c

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -36

app.py CHANGED Viewed

@@ -1,55 +1,55 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 # -----------------------------
-# Device setup
 # -----------------------------
-device = "cuda" if torch.cuda.is_available() else "cpu"
 # -----------------------------
-# Load tokenizer and model
 # -----------------------------
-local_repo = "./"  # Folder with config.json, tokenizer.json, etc.
-tokenizer = AutoTokenizer.from_pretrained(local_repo)
-model = AutoModelForCausalLM.from_pretrained(local_repo, trust_remote_code=True)
-model.to(device)
 model.eval()
 # -----------------------------
-# Safe generation function
 # -----------------------------
-def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
-    # Encode input
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    # Clamp input IDs to vocab size (extra safety)
-    inputs['input_ids'] = inputs['input_ids'].clamp(0, model.config.vocab_size - 1)
-    # Generate output
     with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            do_sample=True,
-            temperature=temperature,
-            pad_token_id=model.config.eos_token_id
-        )
-    # Decode safely
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # -----------------------------
-# Launch Gradio app
 # -----------------------------
 gr.Interface(
     fn=complete_sentence,
-    inputs=[
-        gr.Textbox(label="Prompt"),
-        gr.Slider(10, 200, value=50, step=10, label="Max new tokens"),
-        gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
-    ],
-    outputs=gr.Textbox(label="Completed Text"),
-    title="MochaV2 Sentence Completion",
-    description="Enter a prompt and get AI completions from your local MochaV2 model."
 ).launch()

+import json
 import torch
+from transformers import GPT2LMHeadModel, GPT2Config
 import gradio as gr
 # -----------------------------
+# Load tokenizer manually
 # -----------------------------
+with open("vocab.json", "r") as f:
+    stoi = json.load(f)
+itos = {i: s for s, i in stoi.items()}
+def encode(text):
+    return [stoi.get(c, 0) for c in text]
+def decode(ids):
+    return "".join([itos.get(i, "") for i in ids])
 # -----------------------------
+# Load model manually
 # -----------------------------
+with open("config.json") as f:
+    cfg = json.load(f)
+config = GPT2Config(
+    vocab_size=cfg["vocab_size"],
+    n_positions=cfg["n_positions"],
+    n_ctx=cfg["n_ctx"],
+    n_embd=cfg["n_embd"],
+    n_layer=cfg["n_layer"],
+    n_head=cfg["n_head"],
+    activation_function=cfg["activation_function"]
+)
+model = GPT2LMHeadModel(config)
+model.load_state_dict(torch.load("pytorch_model.bin", map_location="cpu"))  # your weights
 model.eval()
 # -----------------------------
+# Generation
 # -----------------------------
+def complete_sentence(prompt, max_new_tokens=50):
+    ids = torch.tensor([encode(prompt)])
     with torch.no_grad():
+        outputs = model.generate(ids, max_new_tokens=max_new_tokens, pad_token_id=config.eos_token_id)
+    return decode(outputs[0].tolist())
 # -----------------------------
+# Gradio app
 # -----------------------------
 gr.Interface(
     fn=complete_sentence,
+    inputs=[gr.Textbox(label="Prompt"), gr.Slider(10, 200, value=50, step=10, label="Max tokens")],
+    outputs=gr.Textbox(label="Completed Text")
 ).launch()