Final_Assignment_Templatee

Sleeping

App Files Files Community

Karim0111 commited on Feb 5

Commit

78b82b6

verified ·

1 Parent(s): ffc93d9

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -393

app.py CHANGED Viewed

@@ -4,434 +4,196 @@ import requests
 import pandas as pd
 import re
 from huggingface_hub import InferenceClient
-import time
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Enhanced Agent Definition ---
-class EnhancedGAIAAgent:
     """
-    Enhanced agent for GAIA benchmark using HuggingFace Inference API.
-    Focuses on accurate reasoning and proper answer extraction.
     """
     def __init__(self):
-        print("EnhancedGAIAAgent initialized with HuggingFace Inference API.")
-        # Get HF token - try multiple environment variable names
-        hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or os.getenv("HF_API_TOKEN")
         if not hf_token:
-            print("⚠️  ERROR: No HF token found!")
-            print("   Add HF_TOKEN to Space secrets: Settings → Variables and secrets")
-            print("   Get token at: https://huggingface.co/settings/tokens")
-            self.client = None
-            self.model = None
-            return
-        try:
-            self.client = InferenceClient(token=hf_token)
-            # Use a reliable, fast model
-            # Llama 3.1 8B is fast and good for general tasks
-            self.model = "Qwen/Qwen2.5-Coder-32B-Instruct"
-            print(f"✅ Model initialized: {self.model}")
-            print(f"✅ HF Token configured")
-        except Exception as e:
-            print(f"❌ Error initializing Inference Client: {e}")
-            self.client = None
-            self.model = None
     def __call__(self, question: str) -> str:
-        """
-        Answer a question using the LLM.
-        """
-        print(f"\n{'='*60}")
-        print(f"Q: {question[:150]}...")
-        if self.client is None or self.model is None:
-            error = "ERROR: HF_TOKEN not configured in Space secrets"
-            print(f"A: {error}")
-            return error
         try:
-            answer = self._generate_answer(question)
-            print(f"A: {answer[:150]}...")
-            print(f"{'='*60}\n")
             return answer
         except Exception as e:
-            print(f"❌ Error: {e}")
-            fallback = self._smart_fallback(question)
-            print(f"A (fallback): {fallback}")
-            print(f"{'='*60}\n")
-            return fallback
-    def _generate_answer(self, question: str, max_retries: int = 2) -> str:
-        """
-        Generate answer using HF Inference API with retries.
-        """
-        # Create a focused prompt that encourages concise answers
-        prompt = """You are an expert at the GAIA benchmark.
-For each question:
-1. Identify the question type (factual, math, reasoning)
-2. Plan your approach
-3. Solve step-by-step
-4. Format answer clearly with "FINAL ANSWER: X"
-Be precise and concise!"""
-        for attempt in range(max_retries):
-            try:
-                # Try text_generation first (more reliable for simple API)
-                response = self.client.text_generation(
-                    prompt,
-                    model=self.model,
-                    max_new_tokens=512,
-                    temperature=0.1,
-                    do_sample=False,
-                    return_full_text=False,
-                )
-                if response:
-                    answer = self._clean_answer(response)
-                    if len(answer) > 0:
-                        return answer
-            except Exception as e:
-                print(f"Attempt {attempt + 1} failed: {e}")
-                if attempt < max_retries - 1:
-                    time.sleep(1)
-                    continue
-                else:
-                    # Last resort: try chat completion
-                    try:
-                        messages = [
-                            {"role": "system", "content": "You are a helpful assistant. Answer concisely."},
-                            {"role": "user", "content": question}
-                        ]
-                        chat_response = self.client.chat_completion(
-                            messages=messages,
-                            model=self.model,
-                            max_tokens=512,
-                            temperature=0.1,
-                        )
-                        if chat_response and chat_response.choices:
-                            answer = chat_response.choices[0].message.content
-                            return self._clean_answer(answer)
-                    except Exception as e2:
-                        print(f"Chat completion also failed: {e2}")
-        # If all else fails
-        return self._smart_fallback(question)
     def _clean_answer(self, text: str) -> str:
         """
-        Extract the cleanest possible answer from model output.
         """
-        if not text:
-            return ""
         text = text.strip()
-        # Remove common prefixes
-        prefixes_to_remove = [
-            "Answer:",
-            "The answer is:",
-            "The answer is",
-            "A:",
-            "Final answer:",
-            "Result:",
         ]
-        for prefix in prefixes_to_remove:
-            if text.lower().startswith(prefix.lower()):
-                text = text[len(prefix):].strip()
-        # Try to extract final answer if text is long
-        if len(text) > 200:
-            # Look for concluding patterns
-            patterns = [
-                r"(?:therefore|thus|so|hence)[,:]?\s+(.+?)(?:\.|$)",
-                r"(?:the answer is|final answer)[:]?\s+(.+?)(?:\.|$)",
-                r"(?:result)[:]?\s+(.+?)(?:\.|$)",
-            ]
-            for pattern in patterns:
-                match = re.search(pattern, text, re.IGNORECASE)
-                if match:
-                    extracted = match.group(1).strip()
-                    if 2 < len(extracted) < 100:
-                        return extracted
-            # If no pattern matched, take last sentence
-            sentences = text.split('.')
-            if len(sentences) > 1:
-                last_sentence = sentences[-2].strip()
-                if 2 < len(last_sentence) < 100:
-                    return last_sentence
         return text
-    def _smart_fallback(self, question: str) -> str:
-        """
-        Provide intelligent fallback answers based on question analysis.
-        """
-        q_lower = question.lower()
-        # Math/calculation questions
-        if any(word in q_lower for word in ["calculate", "compute", "how many", "what is"]):
-            # Try to extract numbers and operators
-            numbers = re.findall(r'-?\d+\.?\d*', question)
-            if len(numbers) >= 2:
-                try:
-                    # Simple arithmetic detection
-                    if '+' in question or 'plus' in q_lower or 'sum' in q_lower:
-                        result = float(numbers[0]) + float(numbers[1])
-                        return str(int(result) if result.is_integer() else result)
-                    elif '-' in question or 'minus' in q_lower or 'difference' in q_lower:
-                        result = float(numbers[0]) - float(numbers[1])
-                        return str(int(result) if result.is_integer() else result)
-                    elif '*' in question or 'x' in question or 'times' in q_lower or 'multiply' in q_lower:
-                        result = float(numbers[0]) * float(numbers[1])
-                        return str(int(result) if result.is_integer() else result)
-                    elif '/' in question or 'divide' in q_lower:
-                        result = float(numbers[0]) / float(numbers[1])
-                        return str(int(result) if result.is_integer() else result)
-                    elif '%' in question or 'percent' in q_lower:
-                        # X% of Y
-                        result = (float(numbers[0]) / 100) * float(numbers[1])
-                        return str(int(result) if result.is_integer() else result)
-                except:
-                    pass
-        # Year/date questions
-        if any(word in q_lower for word in ["when", "what year", "date"]):
-            # Look for years in the question
-            years = re.findall(r'\b(19\d{2}|20\d{2})\b', question)
-            if years:
-                return years[-1]  # Return most recent year mentioned
-            return "2024"
-        # Counting questions
-        if "how many" in q_lower or "count" in q_lower:
-            numbers = re.findall(r'\b\d+\b', question)
-            if numbers:
-                return numbers[0]
-        # Default
-        return "Unable to determine answer"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Run agent on all questions and submit results.
-    """
-    space_id = os.getenv("SPACE_ID")
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent
-    print("\n" + "="*60)
-    print("INITIALIZING AGENT")
-    print("="*60)
-    try:
-        agent = EnhancedGAIAAgent()
-        if agent.client is None or agent.model is None:
-            return """⚠️ SETUP REQUIRED: HF_TOKEN not found!
-Steps to fix:
-1. Go to https://huggingface.co/settings/tokens
-2. Create a new token (Read access)
-3. Copy your token
-4. In your Space: Settings → Variables and secrets → New secret
-5. Name: HF_TOKEN
-6. Value: Paste your token
-7. Save and restart Space
-The agent cannot run without this token.""", None
-    except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(f"Agent code: {agent_code}")
-    # 2. Fetch Questions
-    print("\n" + "="*60)
-    print("FETCHING QUESTIONS")
-    print("="*60)
-    try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-            return "No questions received from server.", None
-        print(f"✅ Fetched {len(questions_data)} questions")
-    except Exception as e:
-        print(f"❌ Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    # 3. Run Agent on All Questions
-    print("\n" + "="*60)
-    print("RUNNING AGENT ON QUESTIONS")
-    print("="*60)
-    results_log = []
     answers_payload = []
-    for idx, item in enumerate(questions_data):
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            print(f"⚠️  Skipping invalid item: {item}")
-            continue
-        print(f"\n[{idx + 1}/{len(questions_data)}] Task ID: {task_id}")
-        try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": submitted_answer
-            })
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
-                "Answer": submitted_answer[:80] + "..." if len(submitted_answer) > 80 else submitted_answer
-            })
-        except Exception as e:
-            print(f"❌ Error on task {task_id}: {e}")
-            error_answer = "Error processing question"
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": error_answer
-            })
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text[:80] + "...",
-                "Answer": error_answer
-            })
-    if not answers_payload:
-        return "No answers generated.", pd.DataFrame(results_log)
-    # 4. Submit Results
-    print("\n" + "="*60)
-    print("SUBMITTING RESULTS")
-    print("="*60)
-    submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
         "answers": answers_payload
     }
-    print(f"Submitting {len(answers_payload)} answers for {username}...")
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"🎉 Submission Successful!\n\n"
-            f"User: {result_data.get('username')}\n"
-            f"Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n\n"
-            f"{result_data.get('message', '')}"
-        )
-        print(f"\n✅ {final_status}")
-        return final_status, pd.DataFrame(results_log)
-    except Exception as e:
-        error_msg = f"Submission failed: {e}"
-        print(f"❌ {error_msg}")
-        return error_msg, pd.DataFrame(results_log)
-# --- Gradio Interface ---
-with gr.Blocks(title="GAIA Agent Evaluation") as demo:
-    gr.Markdown("# 🤗 GAIA Benchmark Agent")
     gr.Markdown(
         """
-        **Setup Required:**
-        1. ⚠️ Add HF_TOKEN to Space secrets (Settings → Variables and secrets)
-        2. Get free token at: https://huggingface.co/settings/tokens
-        3. Token type: "Read" access is enough
-        **Then:**
-        - Login with HuggingFace
-        - Click Run button
-        - Wait 5-10 minutes
-        - Get your score!
-        **Target:** 30%+ to pass ✅
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("🚀 Run Evaluation", variant="primary", size="lg")
-    status_output = gr.Textbox(
-        label="Status",
-        lines=8,
-        interactive=False
-    )
-    results_table = gr.DataFrame(
-        label="Results",
-        wrap=True
-    )
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("\n" + "="*70)
-    print(" "*20 + "GAIA AGENT STARTING")
-    print("="*70)
-    space_host = os.getenv("SPACE_HOST")
-    space_id = os.getenv("SPACE_ID")
-    hf_token = os.getenv("HF_TOKEN")
-    if space_host:
-        print(f"✅ Space Host: {space_host}")
-    if space_id:
-        print(f"✅ Space ID: {space_id}")
-    if hf_token:
-        print(f"✅ HF_TOKEN: Found")
-    else:
-        print(f"⚠️  HF_TOKEN: NOT FOUND - Please add to Space secrets!")
-    print("="*70 + "\n")
-    demo.launch(debug=True, share=False)

 import pandas as pd
 import re
 from huggingface_hub import InferenceClient
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# =========================
+# GAIA OPTIMIZED AGENT
+# =========================
+class GAIAAgent:
     """
+    GAIA benchmark agent – chat-only, nscale-safe, exact answers.
     """
     def __init__(self):
+        print("🚀 GAIAAgent initializing...")
+        hf_token = (
+            os.getenv("HF_TOKEN")
+            or os.getenv("HUGGING_FACE_HUB_TOKEN")
+            or os.getenv("HF_API_TOKEN")
+        )
         if not hf_token:
+            raise RuntimeError("HF_TOKEN not found in Space secrets")
+        self.client = InferenceClient(token=hf_token)
+        # ✅ SAFE MODELS (chat-only)
+        self.model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+        # Alternative:
+        # self.model = "Qwen/Qwen2.5-7B-Instruct"
+        print(f"✅ Model loaded: {self.model}")
     def __call__(self, question: str) -> str:
+        print(f"\nQ: {question[:120]}")
         try:
+            answer = self._chat_answer(question)
+            print(f"A: {answer}")
             return answer
         except Exception as e:
+            print(f"❌ Agent error: {e}")
+            return "Unable to determine answer"
+    def _chat_answer(self, question: str) -> str:
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are an expert GAIA benchmark solver.\n"
+                    "Answer EXACTLY what is asked.\n"
+                    "Return ONLY the final answer.\n"
+                    "No explanations, no prefixes, no formatting."
+                )
+            },
+            {
+                "role": "user",
+                "content": question
+            }
+        ]
+        response = self.client.chat_completion(
+            model=self.model,
+            messages=messages,
+            max_tokens=256,
+            temperature=0.0,
+        )
+        if not response or not response.choices:
+            return "Unable to determine answer"
+        raw = response.choices[0].message.content.strip()
+        return self._clean_answer(raw)
     def _clean_answer(self, text: str) -> str:
         """
+        GAIA-safe cleaning: minimal, no hallucinated trimming.
         """
         text = text.strip()
+        # Remove common junk if model disobeys
+        bad_prefixes = [
+            "answer:",
+            "final answer:",
+            "the answer is",
+            "result:"
         ]
+        for p in bad_prefixes:
+            if text.lower().startswith(p):
+                text = text[len(p):].strip()
+        # If multi-line, keep first meaningful line
+        if "\n" in text:
+            text = text.split("\n")[0].strip()
+        # GAIA prefers concise
+        if len(text.split()) > 12:
+            # keep last sentence
+            parts = re.split(r"[.!?]", text)
+            text = parts[-2].strip() if len(parts) > 1 else parts[0].strip()
         return text
+# =========================
+# RUN + SUBMIT
+# =========================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    if not profile:
+        return "Please login with Hugging Face.", None
+    username = profile.username
+    print(f"👤 User: {username}")
+    questions_url = f"{DEFAULT_API_URL}/questions"
+    submit_url = f"{DEFAULT_API_URL}/submit"
+    agent = GAIAAgent()
+    # Fetch questions
+    questions = requests.get(questions_url, timeout=15).json()
     answers_payload = []
+    results_log = []
+    for idx, item in enumerate(questions):
+        task_id = item["task_id"]
+        question = item["question"]
+        print(f"\n[{idx+1}/{len(questions)}] {task_id}")
+        answer = agent(question)
+        answers_payload.append({
+            "task_id": task_id,
+            "submitted_answer": answer
+        })
+        results_log.append({
+            "Task ID": task_id,
+            "Answer": answer
+        })
+    submission = {
+        "username": username,
+        "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
         "answers": answers_payload
     }
+    response = requests.post(submit_url, json=submission, timeout=60)
+    result = response.json()
+    status = (
+        f"🎉 Submission Successful\n\n"
+        f"Score: {result.get('score')}%\n"
+        f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}"
+    )
+    return status, pd.DataFrame(results_log)
+# =========================
+# GRADIO UI
+# =========================
+with gr.Blocks(title="GAIA Agent") as demo:
+    gr.Markdown("# 🤗 GAIA Benchmark Agent (Fixed)")
     gr.Markdown(
         """
+        ✅ Chat-only
+        ✅ nscale-safe
+        ✅ GAIA-optimized
+        **Steps**
+        1. Add `HF_TOKEN` to Space secrets
+        2. Login with Hugging Face
+        3. Click Run
         """
     )
     gr.LoginButton()
+    run_btn = gr.Button("🚀 Run Evaluation", variant="primary")
+    status = gr.Textbox(label="Status", lines=6)
+    table = gr.DataFrame(label="Results")
+    run_btn.click(run_and_submit_all, outputs=[status, table])
 if __name__ == "__main__":
+    demo.launch(debug=True)