Nav772 commited on
Commit
60a5c82
·
verified ·
1 Parent(s): fb6f0ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -14,32 +14,40 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
- print("Mistral Local Agent initialized.")
18
 
19
- model_id = "mistralai/Mistral-7B-Instruct-v0.1"
20
 
21
- # Load model and tokenizer directly
22
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  self.model = AutoModelForCausalLM.from_pretrained(model_id)
24
 
25
- # Create inference pipeline
26
  self.pipeline = pipeline(
27
  "text-generation",
28
  model=self.model,
29
  tokenizer=self.tokenizer,
30
- device=-1
31
  )
32
 
33
  def __call__(self, question: str) -> str:
34
  print(f"Agent received question (first 50 chars): {question[:50]}...")
35
 
36
  try:
37
- prompt = f"<s>[INST] {question.strip()} [/INST]"
38
- output = self.pipeline(prompt, max_new_tokens=256, temperature=0.7)
39
-
40
- # Extract and clean the response
 
 
 
 
 
 
 
 
41
  generated_text = output[0]["generated_text"]
42
- response = generated_text.split("[/INST]")[-1].strip()
43
  return response
44
 
45
  except Exception as e:
 
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
+ print("Zephyr Local Agent initialized.")
18
 
19
+ model_id = "HuggingFaceH4/zephyr-7b-beta"
20
 
21
+ # Load model and tokenizer
22
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  self.model = AutoModelForCausalLM.from_pretrained(model_id)
24
 
25
+ # Create generation pipeline
26
  self.pipeline = pipeline(
27
  "text-generation",
28
  model=self.model,
29
  tokenizer=self.tokenizer,
30
+ device=-1 # CPU
31
  )
32
 
33
  def __call__(self, question: str) -> str:
34
  print(f"Agent received question (first 50 chars): {question[:50]}...")
35
 
36
  try:
37
+ # Format for chat-style prompt
38
+ prompt = f"<|system|>You are a helpful assistant.<|user|>{question.strip()}<|assistant|>"
39
+
40
+ output = self.pipeline(
41
+ prompt,
42
+ max_new_tokens=256,
43
+ do_sample=True,
44
+ temperature=0.7,
45
+ top_p=0.95
46
+ )
47
+
48
+ # Return model's response only (strip off prompt)
49
  generated_text = output[0]["generated_text"]
50
+ response = generated_text.split("<|assistant|>")[-1].strip()
51
  return response
52
 
53
  except Exception as e: