Final_Assignment

Sleeping

Nav772 commited on Jun 16, 2025

Commit

c248357

verified ·

1 Parent(s): 4d07c90

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,14 +14,20 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 class BasicAgent:
     def __init__(self):
-        print("Falcon-RW-1B Agent initialized.")
-        model_id = "tiiuae/falcon-rw-1b"
         # Load tokenizer and model
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
-        self.model = AutoModelForCausalLM.from_pretrained(model_id)
-        # Create a CPU-based pipeline
         self.pipeline = pipeline(
             "text-generation",
             model=self.model,
@@ -33,22 +39,25 @@ class BasicAgent:
         print(f"Agent received question: {question[:50]}...")
         try:
-            prompt = f"Question: {question.strip()}\nAnswer:"
             output = self.pipeline(
                 prompt,
-                max_new_tokens=96,  # 🔽 Lowered from 128
-                temperature=0.5,
-                top_p=0.8
             )
             full_response = output[0]["generated_text"]
-            answer = full_response.split("Answer:")[-1].strip()
-            return answer
         except Exception as e:
-            print(f"❌ Falcon error: {e}")
             return f"❌ Model Error: {str(e)}"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,

 class BasicAgent:
     def __init__(self):
+        print("Mistral Agent loading on CPU...")
+        model_id = "mistralai/Mistral-7B-Instruct-v0.1"
         # Load tokenizer and model
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map="auto",   # Will default to CPU
+            low_cpu_mem_usage=True,  # Helps a bit
+            torch_dtype="auto"
+        )
+        # Create pipeline (CPU-only)
         self.pipeline = pipeline(
             "text-generation",
             model=self.model,
         print(f"Agent received question: {question[:50]}...")
         try:
+            # Format with instruction template
+            prompt = f"<s>[INST] {question.strip()} [/INST]"
             output = self.pipeline(
                 prompt,
+                max_new_tokens=256,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.95
             )
             full_response = output[0]["generated_text"]
+            answer = full_response.split("[/INST]")[-1].strip()
+            return answer
         except Exception as e:
+            print(f"❌ Mistral error: {e}")
             return f"❌ Model Error: {str(e)}"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,