theguywhosucks commited on
Commit
69f32b6
·
verified ·
1 Parent(s): 82e1d8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -36
app.py CHANGED
@@ -1,55 +1,55 @@
 
1
  import torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
4
 
5
  # -----------------------------
6
- # Device setup
7
  # -----------------------------
8
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
9
 
10
  # -----------------------------
11
- # Load tokenizer and model
12
  # -----------------------------
13
- local_repo = "./" # Folder with config.json, tokenizer.json, etc.
14
- tokenizer = AutoTokenizer.from_pretrained(local_repo)
15
- model = AutoModelForCausalLM.from_pretrained(local_repo, trust_remote_code=True)
16
- model.to(device)
 
 
 
 
 
 
 
 
 
 
 
17
  model.eval()
18
 
19
  # -----------------------------
20
- # Safe generation function
21
  # -----------------------------
22
- def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
23
- # Encode input
24
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
25
-
26
- # Clamp input IDs to vocab size (extra safety)
27
- inputs['input_ids'] = inputs['input_ids'].clamp(0, model.config.vocab_size - 1)
28
-
29
- # Generate output
30
  with torch.no_grad():
31
- outputs = model.generate(
32
- **inputs,
33
- max_new_tokens=max_new_tokens,
34
- do_sample=True,
35
- temperature=temperature,
36
- pad_token_id=model.config.eos_token_id
37
- )
38
-
39
- # Decode safely
40
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
41
 
42
  # -----------------------------
43
- # Launch Gradio app
44
  # -----------------------------
45
  gr.Interface(
46
  fn=complete_sentence,
47
- inputs=[
48
- gr.Textbox(label="Prompt"),
49
- gr.Slider(10, 200, value=50, step=10, label="Max new tokens"),
50
- gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
51
- ],
52
- outputs=gr.Textbox(label="Completed Text"),
53
- title="MochaV2 Sentence Completion",
54
- description="Enter a prompt and get AI completions from your local MochaV2 model."
55
  ).launch()
 
1
+ import json
2
  import torch
3
+ from transformers import GPT2LMHeadModel, GPT2Config
4
  import gradio as gr
5
 
6
  # -----------------------------
7
+ # Load tokenizer manually
8
  # -----------------------------
9
+ with open("vocab.json", "r") as f:
10
+ stoi = json.load(f)
11
+ itos = {i: s for s, i in stoi.items()}
12
+
13
+ def encode(text):
14
+ return [stoi.get(c, 0) for c in text]
15
+
16
+ def decode(ids):
17
+ return "".join([itos.get(i, "") for i in ids])
18
 
19
  # -----------------------------
20
+ # Load model manually
21
  # -----------------------------
22
+ with open("config.json") as f:
23
+ cfg = json.load(f)
24
+
25
+ config = GPT2Config(
26
+ vocab_size=cfg["vocab_size"],
27
+ n_positions=cfg["n_positions"],
28
+ n_ctx=cfg["n_ctx"],
29
+ n_embd=cfg["n_embd"],
30
+ n_layer=cfg["n_layer"],
31
+ n_head=cfg["n_head"],
32
+ activation_function=cfg["activation_function"]
33
+ )
34
+
35
+ model = GPT2LMHeadModel(config)
36
+ model.load_state_dict(torch.load("pytorch_model.bin", map_location="cpu")) # your weights
37
  model.eval()
38
 
39
  # -----------------------------
40
+ # Generation
41
  # -----------------------------
42
+ def complete_sentence(prompt, max_new_tokens=50):
43
+ ids = torch.tensor([encode(prompt)])
 
 
 
 
 
 
44
  with torch.no_grad():
45
+ outputs = model.generate(ids, max_new_tokens=max_new_tokens, pad_token_id=config.eos_token_id)
46
+ return decode(outputs[0].tolist())
 
 
 
 
 
 
 
 
47
 
48
  # -----------------------------
49
+ # Gradio app
50
  # -----------------------------
51
  gr.Interface(
52
  fn=complete_sentence,
53
+ inputs=[gr.Textbox(label="Prompt"), gr.Slider(10, 200, value=50, step=10, label="Max tokens")],
54
+ outputs=gr.Textbox(label="Completed Text")
 
 
 
 
 
 
55
  ).launch()