Final_Assignment_Template

Sleeping

Davit6174 commited on Jun 17, 2025

Commit

b277f85

verified ·

1 Parent(s): 57d055c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,27 +23,31 @@ class BasicAgent:
 class ZephyrAgent:
     def __init__(self):
-        model_id = "HuggingFaceH4/zephyr-7b-beta"
-        print(f"Loading model: {model_id}")
-        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
-        self.pipeline = pipeline(
-            "text-generation",
-            model=model_id,
-            tokenizer=self.tokenizer,
-            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
-            device=0 if torch.cuda.is_available() else -1,
-            max_new_tokens=512,
-            temperature=0.7,
-            top_p=0.9,
-        )
-        print("✅ ZephyrAgent initialized.")
     def __call__(self, question: str) -> str:
-        prompt = f"<|system|>\nYou are a helpful AI assistant.\n<|user|>\n{question}\n<|assistant|>\n"
-        print(f"🧠 Prompting ZephyrAgent:\n{prompt[:100]}...")
-        response = self.pipeline(prompt, return_full_text=False)
-        generated_text = response[0]['generated_text'].strip()
-        return generated_text
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 class ZephyrAgent:
     def __init__(self):
+        self.api_url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
+        self.headers = {
+            "Authorization": f"Bearer {os.getenv('HF_TOKEN')}"
+        }
+        print("ZephyrAPI initialized using Inference API.")
     def __call__(self, question: str) -> str:
+        prompt = f"<|system|>\nYou are a helpful assistant.\n<|user|>\n{question}\n<|assistant|>\n"
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_new_tokens": 256,
+                "temperature": 0.7,
+                "top_p": 0.9,
+            }
+        }
+        try:
+            response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=60)
+            response.raise_for_status()
+            result = response.json()
+            return result[0]["generated_text"].split("<|assistant|>")[-1].strip()
+        except Exception as e:
+            print(f"Error: {e}")
+            return "⚠️ Model could not respond. Check API access or token."
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """