theguywhosucks commited on
Commit
b9ebb9a
·
verified ·
1 Parent(s): a4dc3b9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ # HF repo containing your model (with safetensors)
6
+ repo_id = "theguywhosucks/mochaV2"
7
+
8
+ # Load tokenizer from HF (no manual itos/stoi)
9
+ tokenizer = AutoTokenizer.from_pretrained(repo_id, use_fast=False)
10
+
11
+ # Load model (safetensors will be used automatically if available)
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ repo_id,
15
+ torch_dtype=torch.float32, # or torch.float16 for faster GPU inference
16
+ trust_remote_code=True
17
+ )
18
+ model.to(device)
19
+ model.eval()
20
+
21
+ # Gradio function
22
+ def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
23
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
24
+ with torch.no_grad():
25
+ outputs = model.generate(
26
+ input_ids,
27
+ max_new_tokens=max_new_tokens,
28
+ do_sample=True,
29
+ temperature=temperature
30
+ )
31
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
32
+
33
+ # Launch Gradio app
34
+ gr.Interface(
35
+ fn=complete_sentence,
36
+ inputs=[
37
+ gr.Textbox(label="Prompt"),
38
+ gr.Slider(10, 200, value=50, step=10, label="Max new tokens"),
39
+ gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
40
+ ],
41
+ outputs=gr.Textbox(label="Completed Text"),
42
+ title="Mocha Sentence Completion",
43
+ description="Enter a prompt and get AI completions from your model."
44
+ ).launch()