Final_Assignment

Sleeping

Nav772 commited on Jun 15, 2025

Commit

60a5c82

verified ·

1 Parent(s): fb6f0ce

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,32 +14,40 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 class BasicAgent:
     def __init__(self):
-        print("Mistral Local Agent initialized.")
-        model_id = "mistralai/Mistral-7B-Instruct-v0.1"
-        # Load model and tokenizer directly
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.model = AutoModelForCausalLM.from_pretrained(model_id)
-        # Create inference pipeline
         self.pipeline = pipeline(
             "text-generation",
             model=self.model,
             tokenizer=self.tokenizer,
-            device=-1
         )
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
-            prompt = f"<s>[INST] {question.strip()} [/INST]"
-            output = self.pipeline(prompt, max_new_tokens=256, temperature=0.7)
-            # Extract and clean the response
             generated_text = output[0]["generated_text"]
-            response = generated_text.split("[/INST]")[-1].strip()
             return response
         except Exception as e:

 class BasicAgent:
     def __init__(self):
+        print("Zephyr Local Agent initialized.")
+        model_id = "HuggingFaceH4/zephyr-7b-beta"
+        # Load model and tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
         self.model = AutoModelForCausalLM.from_pretrained(model_id)
+        # Create generation pipeline
         self.pipeline = pipeline(
             "text-generation",
             model=self.model,
             tokenizer=self.tokenizer,
+            device=-1  # CPU
         )
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
+            # Format for chat-style prompt
+            prompt = f"<|system|>You are a helpful assistant.<|user|>{question.strip()}<|assistant|>"
+            output = self.pipeline(
+                prompt,
+                max_new_tokens=256,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.95
+            )
+            # Return model's response only (strip off prompt)
             generated_text = output[0]["generated_text"]
+            response = generated_text.split("<|assistant|>")[-1].strip()
             return response
         except Exception as e: