Nav772 commited on
Commit
c938c57
·
verified ·
1 Parent(s): cd5b89b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -19
app.py CHANGED
@@ -14,20 +14,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
- print("Mistral Agent loading on CPU...")
18
-
19
- model_id = "mistralai/Mistral-7B-Instruct-v0.1"
20
 
21
  # Load tokenizer and model
22
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
- self.model = AutoModelForCausalLM.from_pretrained(
24
- model_id,
25
- device_map="auto", # Will default to CPU
26
- low_cpu_mem_usage=True, # Helps a bit
27
- torch_dtype="auto"
28
- )
29
 
30
- # Create pipeline (CPU-only)
31
  self.pipeline = pipeline(
32
  "text-generation",
33
  model=self.model,
@@ -39,23 +33,20 @@ class BasicAgent:
39
  print(f"Agent received question: {question[:50]}...")
40
 
41
  try:
42
- # Format with instruction template
43
- prompt = f"<s>[INST] {question.strip()} [/INST]"
44
-
45
  output = self.pipeline(
46
  prompt,
47
- max_new_tokens=256,
48
- do_sample=True,
49
  temperature=0.7,
50
- top_p=0.95
51
  )
52
 
53
  full_response = output[0]["generated_text"]
54
- answer = full_response.split("[/INST]")[-1].strip()
55
-
56
  return answer
 
57
  except Exception as e:
58
- print(f"❌ Mistral error: {e}")
59
  return f"❌ Model Error: {str(e)}"
60
 
61
  def run_and_submit_all( profile: gr.OAuthProfile | None):
 
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
+ print("Falcon-RW-1B Agent initialized.")
18
+ model_id = "tiiuae/falcon-rw-1b"
 
19
 
20
  # Load tokenizer and model
21
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
22
+ self.model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
23
 
24
+ # Create a CPU-based pipeline
25
  self.pipeline = pipeline(
26
  "text-generation",
27
  model=self.model,
 
33
  print(f"Agent received question: {question[:50]}...")
34
 
35
  try:
36
+ prompt = f"Question: {question.strip()}\nAnswer:"
 
 
37
  output = self.pipeline(
38
  prompt,
39
+ max_new_tokens=128,
 
40
  temperature=0.7,
41
+ top_p=0.9
42
  )
43
 
44
  full_response = output[0]["generated_text"]
45
+ answer = full_response.split("Answer:")[-1].strip()
 
46
  return answer
47
+
48
  except Exception as e:
49
+ print(f"❌ Falcon error: {e}")
50
  return f"❌ Model Error: {str(e)}"
51
 
52
  def run_and_submit_all( profile: gr.OAuthProfile | None):