Nav772 commited on
Commit
c248357
·
verified ·
1 Parent(s): 4d07c90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -12
app.py CHANGED
@@ -14,14 +14,20 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
- print("Falcon-RW-1B Agent initialized.")
18
- model_id = "tiiuae/falcon-rw-1b"
 
19
 
20
  # Load tokenizer and model
21
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
22
- self.model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
23
 
24
- # Create a CPU-based pipeline
25
  self.pipeline = pipeline(
26
  "text-generation",
27
  model=self.model,
@@ -33,22 +39,25 @@ class BasicAgent:
33
  print(f"Agent received question: {question[:50]}...")
34
 
35
  try:
36
- prompt = f"Question: {question.strip()}\nAnswer:"
 
 
37
  output = self.pipeline(
38
  prompt,
39
- max_new_tokens=96, # 🔽 Lowered from 128
40
- temperature=0.5,
41
- top_p=0.8
 
42
  )
43
 
44
  full_response = output[0]["generated_text"]
45
- answer = full_response.split("Answer:")[-1].strip()
46
- return answer
47
 
 
48
  except Exception as e:
49
- print(f"❌ Falcon error: {e}")
50
  return f"❌ Model Error: {str(e)}"
51
-
52
  def run_and_submit_all( profile: gr.OAuthProfile | None):
53
  """
54
  Fetches all questions, runs the BasicAgent on them, submits all answers,
 
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
+ print("Mistral Agent loading on CPU...")
18
+
19
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
20
 
21
  # Load tokenizer and model
22
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
+ self.model = AutoModelForCausalLM.from_pretrained(
24
+ model_id,
25
+ device_map="auto", # Will default to CPU
26
+ low_cpu_mem_usage=True, # Helps a bit
27
+ torch_dtype="auto"
28
+ )
29
 
30
+ # Create pipeline (CPU-only)
31
  self.pipeline = pipeline(
32
  "text-generation",
33
  model=self.model,
 
39
  print(f"Agent received question: {question[:50]}...")
40
 
41
  try:
42
+ # Format with instruction template
43
+ prompt = f"<s>[INST] {question.strip()} [/INST]"
44
+
45
  output = self.pipeline(
46
  prompt,
47
+ max_new_tokens=256,
48
+ do_sample=True,
49
+ temperature=0.7,
50
+ top_p=0.95
51
  )
52
 
53
  full_response = output[0]["generated_text"]
54
+ answer = full_response.split("[/INST]")[-1].strip()
 
55
 
56
+ return answer
57
  except Exception as e:
58
+ print(f"❌ Mistral error: {e}")
59
  return f"❌ Model Error: {str(e)}"
60
+
61
  def run_and_submit_all( profile: gr.OAuthProfile | None):
62
  """
63
  Fetches all questions, runs the BasicAgent on them, submits all answers,