lap096 commited on
Commit
06df239
Β·
verified Β·
1 Parent(s): 6e46416

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -2,9 +2,10 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
- MODEL_NAME = "distilgpt2" # tiny model, works on free CPU Spaces
 
6
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
7
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
8
 
9
  SYSTEM_PROMPT = """
10
  You are agent_1, a single AI system.
@@ -20,36 +21,42 @@ Agent_1:
20
  ###
21
  """
22
 
23
- def respond(user_input):
24
- # Keep prompt short to avoid memory issues
25
  prompt = SYSTEM_PROMPT + f"User: {user_input}\nAgent_1:\n"
26
 
27
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=200)
28
 
29
  with torch.no_grad():
30
  outputs = model.generate(
31
  **inputs,
32
- max_new_tokens=50, # short output to prevent crashes
33
  do_sample=True,
34
  temperature=0.7,
35
  pad_token_id=tokenizer.eos_token_id
36
  )
37
 
38
- text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
39
 
40
- # Parse thoughts and response
41
- if "[Response]" in text:
42
- parts = text.split("[Response]")
43
  thoughts = parts[0].strip()
44
- response = parts[1].strip()
45
  else:
46
- thoughts = "[Thoughts] Thinking..."
47
- response = text.strip()
48
 
49
  return f"{thoughts}\n[Response] {response}"
50
 
51
- gr.ChatInterface(
 
52
  fn=respond,
53
  title="agent_1 Hugging Face Space",
54
  description="Tiny AI with simulated internal thoughts. Fully local, runs on free CPU."
55
- ).launch()
 
 
 
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
+ # Load model and tokenizer
6
+ MODEL_NAME = "distilgpt2"
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, low_cpu_mem_usage=True)
9
 
10
  SYSTEM_PROMPT = """
11
  You are agent_1, a single AI system.
 
21
  ###
22
  """
23
 
24
+ def respond(user_input, history):
25
+ # Construct the specific prompt for this turn
26
  prompt = SYSTEM_PROMPT + f"User: {user_input}\nAgent_1:\n"
27
 
28
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256)
29
 
30
  with torch.no_grad():
31
  outputs = model.generate(
32
  **inputs,
33
+ max_new_tokens=60,
34
  do_sample=True,
35
  temperature=0.7,
36
  pad_token_id=tokenizer.eos_token_id
37
  )
38
 
39
+ # Decode only the newly generated tokens
40
+ full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
+ new_text = full_text[len(tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)):].strip()
42
 
43
+ # Parse logic for Thoughts and Response
44
+ if "[Response]" in new_text:
45
+ parts = new_text.split("[Response]")
46
  thoughts = parts[0].strip()
47
+ response = parts[1].split("User:")[0].strip() # Clean up potential hallucinations
48
  else:
49
+ thoughts = "[Thoughts] Processing..."
50
+ response = new_text.split("User:")[0].strip()
51
 
52
  return f"{thoughts}\n[Response] {response}"
53
 
54
+ # Launch the interface
55
+ demo = gr.ChatInterface(
56
  fn=respond,
57
  title="agent_1 Hugging Face Space",
58
  description="Tiny AI with simulated internal thoughts. Fully local, runs on free CPU."
59
+ )
60
+
61
+ if __name__ == "__main__":
62
+ demo.launch()