Nav772 commited on
Commit
7b20059
·
verified ·
1 Parent(s): 60a5c82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -14,28 +14,33 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
- print("Zephyr Local Agent initialized.")
18
 
19
- model_id = "HuggingFaceH4/zephyr-7b-beta"
20
 
21
- # Load model and tokenizer
22
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
- self.model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
24
 
25
- # Create generation pipeline
26
  self.pipeline = pipeline(
27
  "text-generation",
28
  model=self.model,
29
  tokenizer=self.tokenizer,
30
- device=-1 # CPU
31
  )
32
 
33
  def __call__(self, question: str) -> str:
34
- print(f"Agent received question (first 50 chars): {question[:50]}...")
35
 
36
  try:
37
- # Format for chat-style prompt
38
- prompt = f"<|system|>You are a helpful assistant.<|user|>{question.strip()}<|assistant|>"
39
 
40
  output = self.pipeline(
41
  prompt,
@@ -45,13 +50,12 @@ class BasicAgent:
45
  top_p=0.95
46
  )
47
 
48
- # Return model's response only (strip off prompt)
49
- generated_text = output[0]["generated_text"]
50
- response = generated_text.split("<|assistant|>")[-1].strip()
51
- return response
52
 
 
53
  except Exception as e:
54
- print(f"❌ Error during model inference: {e}")
55
  return f"❌ Model Error: {str(e)}"
56
 
57
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
+ print("Mistral Agent loading on CPU...")
18
 
19
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
20
 
21
+ # Load tokenizer and model
22
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
+ self.model = AutoModelForCausalLM.from_pretrained(
24
+ model_id,
25
+ device_map="auto", # Will default to CPU
26
+ low_cpu_mem_usage=True, # Helps a bit
27
+ torch_dtype="auto"
28
+ )
29
 
30
+ # Create pipeline (CPU-only)
31
  self.pipeline = pipeline(
32
  "text-generation",
33
  model=self.model,
34
  tokenizer=self.tokenizer,
35
+ device=-1 # force CPU
36
  )
37
 
38
  def __call__(self, question: str) -> str:
39
+ print(f"Agent received question: {question[:50]}...")
40
 
41
  try:
42
+ # Format with instruction template
43
+ prompt = f"<s>[INST] {question.strip()} [/INST]"
44
 
45
  output = self.pipeline(
46
  prompt,
 
50
  top_p=0.95
51
  )
52
 
53
+ full_response = output[0]["generated_text"]
54
+ answer = full_response.split("[/INST]")[-1].strip()
 
 
55
 
56
+ return answer
57
  except Exception as e:
58
+ print(f"❌ Mistral error: {e}")
59
  return f"❌ Model Error: {str(e)}"
60
 
61
  def run_and_submit_all( profile: gr.OAuthProfile | None):