cat4laugh commited on
Commit
5f63a02
·
verified ·
1 Parent(s): 23334ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -6,24 +6,22 @@ import torch
6
  model_name = "haykgrigorian/TimeCapsuleLLM-v2-llama-1.2B"
7
 
8
  # 2. Load the Model and Tokenizer
9
- # We use device_map="auto" to use available CPU RAM efficiently
10
  print("Loading model... this usually takes 1-2 minutes on first run.")
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  model = AutoModelForCausalLM.from_pretrained(model_name)
13
 
14
  # 3. Define the Generate Function
15
  def generate_text(prompt, max_tokens=100, temperature=0.7):
16
- # Format inputs
17
- inputs = tokenizer(prompt, return_tensors="pt")
18
 
19
  # Generate
20
- # We disable gradients to save memory and speed up inference
21
  with torch.no_grad():
22
  outputs = model.generate(
23
  **inputs,
24
  max_new_tokens=int(max_tokens),
25
  temperature=float(temperature),
26
- do_sample=True, # Allows for creativity/temperature
27
  pad_token_id=tokenizer.eos_token_id
28
  )
29
 
@@ -31,7 +29,6 @@ def generate_text(prompt, max_tokens=100, temperature=0.7):
31
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
32
 
33
  # 4. Launch the Gradio Interface
34
- # This creates the UI and the API endpoint automatically
35
  iface = gr.Interface(
36
  fn=generate_text,
37
  inputs=[
 
6
  model_name = "haykgrigorian/TimeCapsuleLLM-v2-llama-1.2B"
7
 
8
  # 2. Load the Model and Tokenizer
 
9
  print("Loading model... this usually takes 1-2 minutes on first run.")
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
  model = AutoModelForCausalLM.from_pretrained(model_name)
12
 
13
  # 3. Define the Generate Function
14
  def generate_text(prompt, max_tokens=100, temperature=0.7):
15
+ # FIX: We added return_token_type_ids=False to stop the error
16
+ inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False)
17
 
18
  # Generate
 
19
  with torch.no_grad():
20
  outputs = model.generate(
21
  **inputs,
22
  max_new_tokens=int(max_tokens),
23
  temperature=float(temperature),
24
+ do_sample=True,
25
  pad_token_id=tokenizer.eos_token_id
26
  )
27
 
 
29
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
30
 
31
  # 4. Launch the Gradio Interface
 
32
  iface = gr.Interface(
33
  fn=generate_text,
34
  inputs=[