Final_Assignment

Sleeping

Nav772 commited on Jun 15, 2025

Commit

7b20059

verified ·

1 Parent(s): 60a5c82

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,28 +14,33 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 class BasicAgent:
     def __init__(self):
-        print("Zephyr Local Agent initialized.")
-        model_id = "HuggingFaceH4/zephyr-7b-beta"
-        # Load model and tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
-        self.model = AutoModelForCausalLM.from_pretrained(model_id)
-        # Create generation pipeline
         self.pipeline = pipeline(
             "text-generation",
             model=self.model,
             tokenizer=self.tokenizer,
-            device=-1  # CPU
         )
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
-            # Format for chat-style prompt
-            prompt = f"<|system|>You are a helpful assistant.<|user|>{question.strip()}<|assistant|>"
             output = self.pipeline(
                 prompt,
@@ -45,13 +50,12 @@ class BasicAgent:
                 top_p=0.95
             )
-            # Return model's response only (strip off prompt)
-            generated_text = output[0]["generated_text"]
-            response = generated_text.split("<|assistant|>")[-1].strip()
-            return response
         except Exception as e:
-            print(f"❌ Error during model inference: {e}")
             return f"❌ Model Error: {str(e)}"
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 class BasicAgent:
     def __init__(self):
+        print("Mistral Agent loading on CPU...")
+        model_id = "mistralai/Mistral-7B-Instruct-v0.1"
+        # Load tokenizer and model
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map="auto",   # Will default to CPU
+            low_cpu_mem_usage=True,  # Helps a bit
+            torch_dtype="auto"
+        )
+        # Create pipeline (CPU-only)
         self.pipeline = pipeline(
             "text-generation",
             model=self.model,
             tokenizer=self.tokenizer,
+            device=-1  # force CPU
         )
     def __call__(self, question: str) -> str:
+        print(f"Agent received question: {question[:50]}...")
         try:
+            # Format with instruction template
+            prompt = f"<s>[INST] {question.strip()} [/INST]"
             output = self.pipeline(
                 prompt,
                 top_p=0.95
             )
+            full_response = output[0]["generated_text"]
+            answer = full_response.split("[/INST]")[-1].strip()
+            return answer
         except Exception as e:
+            print(f"❌ Mistral error: {e}")
             return f"❌ Model Error: {str(e)}"
 def run_and_submit_all( profile: gr.OAuthProfile | None):