theguywhosucks commited on
Commit
262bf6d
·
verified ·
1 Parent(s): 6f17340

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -2,30 +2,28 @@ import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
- # HF repo containing your model (with safetensors)
6
  repo_id = "theguywhosucks/mochaV2"
7
 
8
- # Load tokenizer
9
  tokenizer = AutoTokenizer.from_pretrained(repo_id, use_fast=False)
10
 
11
- # GPT2-style models often don't have a pad token, set it to eos
12
  if tokenizer.pad_token is None:
13
  tokenizer.pad_token = tokenizer.eos_token
14
 
15
- # Load model (safetensors automatically used if available)
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  model = AutoModelForCausalLM.from_pretrained(
18
  repo_id,
19
- dtype=torch.float32, # torch_dtype is deprecated; use dtype
20
  trust_remote_code=True
21
  )
22
  model.to(device)
23
  model.eval()
24
 
25
- # Gradio completion function
26
  def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
27
- # Encode input with proper padding
28
- inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(device)
29
 
30
  with torch.no_grad():
31
  outputs = model.generate(
@@ -33,10 +31,9 @@ def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
33
  max_new_tokens=max_new_tokens,
34
  do_sample=True,
35
  temperature=temperature,
36
- pad_token_id=tokenizer.pad_token_id # ensures safe embedding lookup
37
  )
38
 
39
- # Decode output safely
40
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
41
 
42
  # Launch Gradio app
 
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
 
5
  repo_id = "theguywhosucks/mochaV2"
6
 
7
+ # Load the tokenizer from the repo (uses tokenizer.json internally)
8
  tokenizer = AutoTokenizer.from_pretrained(repo_id, use_fast=False)
9
 
10
+ # GPT2-style models often don't have a pad token
11
  if tokenizer.pad_token is None:
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
14
+ # Load the model (safetensors used automatically)
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
  model = AutoModelForCausalLM.from_pretrained(
17
  repo_id,
18
+ dtype=torch.float32, # torch_dtype is deprecated
19
  trust_remote_code=True
20
  )
21
  model.to(device)
22
  model.eval()
23
 
 
24
  def complete_sentence(prompt, max_new_tokens=50, temperature=0.7):
25
+ # Tokenize input safely
26
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
27
 
28
  with torch.no_grad():
29
  outputs = model.generate(
 
31
  max_new_tokens=max_new_tokens,
32
  do_sample=True,
33
  temperature=temperature,
34
+ pad_token_id=tokenizer.pad_token_id
35
  )
36
 
 
37
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
38
 
39
  # Launch Gradio app