theguywhosucks commited on
Commit
6f17340
·
verified ·
1 Parent(s): f3251a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -7
app.py CHANGED
@@ -5,29 +5,38 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  # HF repo containing your model (with safetensors)
6
  repo_id = "theguywhosucks/mochaV2"
7
 
8
- # Load tokenizer from HF (no manual itos/stoi)
9
  tokenizer = AutoTokenizer.from_pretrained(repo_id, use_fast=False)
10
 
11
- # Load model (safetensors will be used automatically if available)
 
 
 
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model = AutoModelForCausalLM.from_pretrained(
14
  repo_id,
15
- torch_dtype=torch.float32, # or torch.float16 for faster GPU inference
16
  trust_remote_code=True
17
  )
18
  model.to(device)
19
  model.eval()
20
 
21
- # Gradio function
22
  def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
23
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
 
 
24
  with torch.no_grad():
25
  outputs = model.generate(
26
- input_ids,
27
  max_new_tokens=max_new_tokens,
28
  do_sample=True,
29
- temperature=temperature
 
30
  )
 
 
31
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
32
 
33
  # Launch Gradio app
 
5
  # HF repo containing your model (with safetensors)
6
  repo_id = "theguywhosucks/mochaV2"
7
 
8
+ # Load tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained(repo_id, use_fast=False)
10
 
11
+ # GPT2-style models often don't have a pad token, set it to eos
12
+ if tokenizer.pad_token is None:
13
+ tokenizer.pad_token = tokenizer.eos_token
14
+
15
+ # Load model (safetensors automatically used if available)
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  model = AutoModelForCausalLM.from_pretrained(
18
  repo_id,
19
+ dtype=torch.float32, # torch_dtype is deprecated; use dtype
20
  trust_remote_code=True
21
  )
22
  model.to(device)
23
  model.eval()
24
 
25
+ # Gradio completion function
26
  def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
27
+ # Encode input with proper padding
28
+ inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)
29
+
30
  with torch.no_grad():
31
  outputs = model.generate(
32
+ **inputs,
33
  max_new_tokens=max_new_tokens,
34
  do_sample=True,
35
+ temperature=temperature,
36
+ pad_token_id=tokenizer.pad_token_id # ensures safe embedding lookup
37
  )
38
+
39
+ # Decode output safely
40
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
41
 
42
  # Launch Gradio app