Final_Assignment_Template

Runtime error

App Files Files Community

LamiaYT commited on Jun 28, 2025

Commit

d66e9b7

1 Parent(s): 19b7914

fixing ver3

Browse files

Files changed (1) hide show

app.py +201 -231

app.py CHANGED Viewed

@@ -6,343 +6,313 @@ import re
 import numexpr
 import pandas as pd
 import math
-import pdfminer
-from duckduckgo_search import DDGS
 from pdfminer.high_level import extract_text
 from bs4 import BeautifulSoup
-import html2text
-from typing import Dict, Any, List, Tuple, Callable, Optional
 from dotenv import load_dotenv
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 import torch
 import time
 import gc
-import warnings
-# Suppress warnings
-warnings.filterwarnings("ignore")
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # --- Load Environment Variables ---
 load_dotenv()
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
-# --- Constants (ULTRA FAST MODE) ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-MAX_STEPS = 5 # Reduced to 3
-MAX_TOKENS = 100  # Very short responses
 MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
-TIMEOUT_PER_QUESTION = 20  # 15 seconds max
-MAX_CONTEXT = 1024  # Very short context
-# --- Configure Environment ---
-os.environ["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
-os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
-os.environ["BITSANDBYTES_NOWELCOME"] = "1"
-print("Loading model (ULTRA FAST mode)...")
 start_time = time.time()
-# Minimal model loading
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
     torch_dtype=torch.float32,
-    device_map="cpu",
-    low_cpu_mem_usage=True,
-    use_cache=False
 )
 tokenizer = AutoTokenizer.from_pretrained(
-    MODEL_NAME,
     use_fast=True,
-    trust_remote_code=True,
-    padding_side="left"
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
-# Pre-compile generation config
-GENERATION_CONFIG = GenerationConfig(
-    max_new_tokens=MAX_TOKENS,
-    temperature=0.3,
-    do_sample=True,
-    pad_token_id=tokenizer.pad_token_id,
-    eos_token_id=tokenizer.eos_token_id,
-    use_cache=False,
-    repetition_penalty=1.1
-)
-load_time = time.time() - start_time
-print(f"Model loaded in {load_time:.2f} seconds")
-# --- Lightning Fast Tools ---
 def web_search(query: str) -> str:
-    """Ultra-fast web search"""
     try:
         if SERPER_API_KEY:
-            params = {'q': query[:100], 'num': 1}  # Single result
-            headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
             response = requests.post(
                 'https://google.serper.dev/search',
                 headers=headers,
                 json=params,
-                timeout=3
             )
             results = response.json()
-            if 'organic' in results and results['organic']:
-                return f"{results['organic'][0]['title']}: {results['organic'][0]['snippet'][:200]}"
-            return "No results"
         else:
             with DDGS() as ddgs:
-                for result in ddgs.text(query, max_results=1):
-                    return f"{result['title']}: {result['body'][:200]}"
-            return "No results"
-    except:
-        return "Search failed"
 def calculator(expression: str) -> str:
-    """Lightning calculator"""
     try:
-        clean_expr = re.sub(r'[^\d+\-*/().\s]', '', str(expression))
-        if not clean_expr.strip():
-            return "Invalid expression"
-        result = eval(clean_expr)  # Simple eval for speed
         return str(float(result))
-    except:
-        return "Calc error"
 def read_pdf(file_path: str) -> str:
-    """Fast PDF reader"""
     try:
         text = extract_text(file_path)
-        return text[:500] if text else "No PDF text"
-    except:
-        return "PDF error"
 def read_webpage(url: str) -> str:
-    """Fast webpage reader"""
     try:
-        response = requests.get(url, timeout=3, headers={'User-Agent': 'Bot'})
         soup = BeautifulSoup(response.text, 'html.parser')
-        text = soup.get_text(separator=' ', strip=True)
-        return text[:500] if text else "No webpage text"
-    except:
-        return "Webpage error"
 TOOLS = {
     "web_search": web_search,
-    "calculator": calculator,
     "read_pdf": read_pdf,
     "read_webpage": read_webpage
 }
-# --- Ultra Fast Agent ---
-class FastGAIA_Agent:
     def __init__(self):
         self.tools = TOOLS
-        self.prompt_template = (
-            "<|system|>You solve GAIA questions fast. Tools: web_search, calculator, read_pdf, read_webpage.\n"
-            "Format: ```json\n{\"tool\": \"name\", \"args\": {\"key\": \"value\"}}```\n"
-            "Always end with: Final Answer: [answer]<|end|>\n"
-            "<|user|>{history}<|end|>\n<|assistant|>"
-        )
     def __call__(self, question: str) -> str:
         start_time = time.time()
         try:
-            history = f"Question: {question}"
             for step in range(MAX_STEPS):
                 if time.time() - start_time > TIMEOUT_PER_QUESTION:
-                    return "TIMEOUT"
-                response = self._fast_generate(history)
-                # Quick final answer check
                 if "Final Answer:" in response:
-                    answer = response.split("Final Answer:")[-1].strip().split('\n')[0]
-                    return answer[:200]  # Limit answer length
-                # Quick tool parsing
-                tool_result = self._quick_tool_use(response)
-                if tool_result:
-                    history += f"\nAction: {tool_result}"
                 else:
-                    history += f"\nThought: {response[:100]}"
-                # Keep history short
-                if len(history) > 800:
-                    history = history[-800:]
-            return "No solution found"
         except Exception as e:
-            return f"Error: {str(e)[:50]}"
-    def _fast_generate(self, history: str) -> str:
-        try:
-            prompt = self.prompt_template.format(history=history)
-            # Fast tokenization
-            inputs = tokenizer(
-                prompt,
-                return_tensors="pt",
-                truncation=True,
-                max_length=MAX_CONTEXT,
-                padding=False
             )
-            # Fast generation
-            with torch.no_grad():
-                outputs = model.generate(
-                    inputs.input_ids,
-                    generation_config=GENERATION_CONFIG,
-                    attention_mask=inputs.attention_mask
-                )
-            # Fast decoding
-            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            response = response.split("<|assistant|>")[-1].strip()
-            # Immediate cleanup
-            del inputs, outputs
-            gc.collect()
-            return response
-        except Exception as e:
-            return f"Gen error: {str(e)}"
-    def _quick_tool_use(self, text: str) -> str:
         try:
-            # Quick JSON extraction
-            json_match = re.search(r'```json\s*({[^}]*})\s*```', text)
-            if not json_match:
-                return ""
-            tool_data = json.loads(json_match.group(1))
-            tool_name = tool_data.get("tool", "")
-            args = tool_data.get("args", {})
-            if tool_name in self.tools:
-                result = self.tools[tool_name](**args)
-                return f"Used {tool_name}: {str(result)[:150]}"
         except:
-            pass
-        return ""
-# --- Lightning Fast Runner ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
-        return "❌ Please login first", None
-    username = profile.username
-    # Quick setup
-    agent = FastGAIA_Agent()
-    api_url = DEFAULT_API_URL
-    space_id = os.getenv("SPACE_ID", "unknown")
-    print(f"🚀 ULTRA FAST mode - User: {username}")
-    # Fetch questions quickly
     try:
-        response = requests.get(f"{api_url}/questions", timeout=10)
-        questions = response.json()
-        print(f"📝 Got {len(questions)} questions")
     except Exception as e:
-        return f"❌ Failed to get questions: {e}", None
-    # Process at lightning speed
     results = []
     answers = []
-    start_time = time.time()
-    for i, item in enumerate(questions):
         task_id = item.get("task_id")
-        question = item.get("question", "")
-        if not task_id:
             continue
-        print(f"⚡ [{i+1}/{len(questions)}] {task_id[:8]}...")
-        try:
-            answer = agent(question)
-            answers.append({"task_id": task_id, "submitted_answer": answer})
-            results.append({
-                "ID": task_id[:8],
-                "Question": question[:60] + "...",
-                "Answer": answer[:80] + "..." if len(answer) > 80 else answer
-            })
-        except Exception as e:
-            error_ans = f"ERROR: {str(e)[:30]}"
-            answers.append({"task_id": task_id, "submitted_answer": error_ans})
-            results.append({
-                "ID": task_id[:8],
-                "Question": question[:60] + "...",
-                "Answer": error_ans
-            })
-        # Quick memory cleanup
-        if i % 5 == 0:
-            gc.collect()
-    total_time = time.time() - start_time
-    print(f"⏱️  Completed in {total_time:.1f}s ({total_time/len(questions):.1f}s per question)")
-    # Submit results
     try:
-        submission = {
-            "username": username,
-            "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
-            "answers": answers
-        }
-        response = requests.post(f"{api_url}/submit", json=submission, timeout=30)
         result = response.json()
-        status = (
-            f"🎯 ULTRA FAST RESULTS\n"
-            f"👤 User: {result.get('username', username)}\n"
-            f"📊 Score: {result.get('score', 'N/A')}% "
-            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
-            f"⏱️  Time: {total_time:.1f}s ({total_time/len(questions):.1f}s/question)\n"
-            f"💬 {result.get('message', 'Completed!')}"
-        )
-        return status, pd.DataFrame(results)
     except Exception as e:
-        error_status = f"❌ Submission failed: {str(e)}\n⏱️  Processing time: {total_time:.1f}s"
-        return error_status, pd.DataFrame(results)
-# --- Ultra Simple UI ---
-with gr.Blocks(title="GAIA Agent - ULTRA FAST") as demo:
-    gr.Markdown("# ⚡ GAIA Agent - ULTRA FAST MODE")
-    gr.Markdown("**Speed settings:** 3 steps max • 64 tokens • 15s timeout • Lightning tools")
-    gr.LoginButton()
-    run_btn = gr.Button("🚀 RUN ULTRA FAST", variant="primary", size="lg")
-    status = gr.Textbox(label="📊 Results", lines=6, interactive=False)
-    table = gr.DataFrame(label="📋 Answers", interactive=False)
-    run_btn.click(run_and_submit_all, outputs=[status, table], show_progress=True)
 if __name__ == "__main__":
-    print("⚡ ULTRA FAST GAIA Agent Starting...")
-    print(f"⚙️  {MAX_STEPS} steps, {MAX_TOKENS} tokens, {TIMEOUT_PER_QUESTION}s timeout")
-    demo.launch(
-        share=True,  # Added share=True for public link
-        server_name="0.0.0.0",
-        server_port=7860,
-        debug=False,
-        show_error=True
-    )

 import numexpr
 import pandas as pd
 import math
 from pdfminer.high_level import extract_text
 from bs4 import BeautifulSoup
+from typing import Dict, Any, List, Tuple, Optional
 from dotenv import load_dotenv
 from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
 import torch
 import time
 import gc
 # --- Load Environment Variables ---
 load_dotenv()
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
+# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MAX_STEPS = 6  # Increased from 4
+MAX_TOKENS = 256  # Increased from 128
 MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
+TIMEOUT_PER_QUESTION = 45  # Increased from 30
+MAX_RESULT_LENGTH = 500  # For tool outputs
+# --- Model Loading ---
+print("Loading optimized model...")
 start_time = time.time()
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     trust_remote_code=True,
     torch_dtype=torch.float32,
+    device_map="auto",
+    low_cpu_mem_usage=True
 )
 tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_NAME,
     use_fast=True,
+    trust_remote_code=True
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
+print(f"Model loaded in {time.time() - start_time:.2f} seconds")
+# --- Enhanced Tools ---
 def web_search(query: str) -> str:
+    """Enhanced web search with better result parsing"""
     try:
         if SERPER_API_KEY:
+            params = {'q': query, 'num': 3, 'hl': 'en', 'gl': 'us'}
+            headers = {'X-API-KEY': SERPER_API_KEY}
             response = requests.post(
                 'https://google.serper.dev/search',
                 headers=headers,
                 json=params,
+                timeout=10
             )
             results = response.json()
+            if 'organic' in results:
+                output = []
+                for r in results['organic'][:3]:
+                    if 'title' in r and 'snippet' in r:
+                        output.append(f"{r['title']}: {r['snippet']}")
+                return "\n".join(output)[:MAX_RESULT_LENGTH]
+            return "No relevant results found"
         else:
             with DDGS() as ddgs:
+                results = [r for r in ddgs.text(query, max_results=3)]
+                return "\n".join([f"{r['title']}: {r['body']}" for r in results])[:MAX_RESULT_LENGTH]
+    except Exception as e:
+        return f"Search error: {str(e)}"
 def calculator(expression: str) -> str:
+    """More robust calculator with validation"""
     try:
+        # Clean and validate expression
+        expression = re.sub(r'[^\d+\-*/().^%,\s]', '', expression)
+        if not expression:
+            return "Invalid empty expression"
+        # Handle percentages and commas
+        expression = expression.replace('%', '/100').replace(',', '')
+        result = numexpr.evaluate(expression)
         return str(float(result))
+    except Exception as e:
+        return f"Calculation error: {str(e)}"
 def read_pdf(file_path: str) -> str:
+    """PDF reader with better text extraction"""
     try:
         text = extract_text(file_path)
+        if not text:
+            return "No readable text found in PDF"
+        # Clean and condense text
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text[:MAX_RESULT_LENGTH]
+    except Exception as e:
+        return f"PDF read error: {str(e)}"
 def read_webpage(url: str) -> str:
+    """Improved webpage reader with better content extraction"""
     try:
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        response = requests.get(url, timeout=10, headers=headers)
+        response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Remove unwanted elements
+        for element in soup(['script', 'style', 'nav', 'footer']):
+            element.decompose()
+        # Get text with better formatting
+        text = soup.get_text(separator='\n', strip=True)
+        text = re.sub(r'\n{3,}', '\n\n', text)
+        return text[:MAX_RESULT_LENGTH] if text else "No main content found"
+    except Exception as e:
+        return f"Webpage read error: {str(e)}"
 TOOLS = {
     "web_search": web_search,
+    "calculator": calculator,
     "read_pdf": read_pdf,
     "read_webpage": read_webpage
 }
+# --- Improved GAIA Agent ---
+class GAIA_Agent:
     def __init__(self):
         self.tools = TOOLS
+        self.system_prompt = """You are an advanced GAIA problem solver. Follow these steps:
+1. Analyze the question carefully
+2. Choose the most appropriate tool
+3. Process the results
+4. Provide a precise final answer
+Available Tools:
+- web_search: For general knowledge questions
+- calculator: For math problems
+- read_pdf: For PDF content extraction
+- read_webpage: For webpage content extraction
+Tool format: ```json
+{"tool": "tool_name", "args": {"arg1": value}}```
+Always end with: Final Answer: [your answer]"""
     def __call__(self, question: str) -> str:
         start_time = time.time()
+        history = [f"Question: {question}"]
         try:
             for step in range(MAX_STEPS):
                 if time.time() - start_time > TIMEOUT_PER_QUESTION:
+                    return "Timeout: Processing took too long"
+                prompt = self._build_prompt(history)
+                response = self._call_model(prompt)
                 if "Final Answer:" in response:
+                    answer = response.split("Final Answer:")[-1].strip()
+                    return answer[:500]  # Limit answer length
+                tool_call = self._parse_tool_call(response)
+                if tool_call:
+                    tool_name, args = tool_call
+                    observation = self._use_tool(tool_name, args)
+                    history.append(f"Tool Used: {tool_name}")
+                    history.append(f"Tool Result: {observation[:300]}...")  # Truncate long results
                 else:
+                    history.append(f"Analysis: {response}")
+                gc.collect()
+            return "Maximum steps reached without final answer"
         except Exception as e:
+            return f"Error: {str(e)}"
+    def _build_prompt(self, history: List[str]) -> str:
+        return f"<|system|>\n{self.system_prompt}<|end|>\n<|user|>\n" + "\n".join(history) + "<|end|>\n<|assistant|>"
+    def _call_model(self, prompt: str) -> str:
+        inputs = tokenizer(
+            prompt,
+            return_tensors="pt",
+            truncation=True,
+            max_length=3072,
+            padding=False
+        )
+        generation_config = GenerationConfig(
+            max_new_tokens=MAX_TOKENS,
+            temperature=0.3,
+            top_p=0.9,
+            do_sample=True,
+            pad_token_id=tokenizer.pad_token_id
+        )
+        with torch.no_grad():
+            outputs = model.generate(
+                inputs.input_ids,
+                generation_config=generation_config,
+                attention_mask=inputs.attention_mask
             )
+        return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()
+    def _parse_tool_call(self, text: str) -> Optional[Tuple[str, Dict]]:
         try:
+            json_match = re.search(r'```json\s*({.+?})\s*```', text, re.DOTALL)
+            if json_match:
+                tool_call = json.loads(json_match.group(1))
+                if "tool" in tool_call and "args" in tool_call:
+                    return tool_call["tool"], tool_call["args"]
         except:
+            return None
+        return None
+    def _use_tool(self, tool_name: str, args: Dict) -> str:
+        if tool_name not in self.tools:
+            return f"Unknown tool: {tool_name}"
+        try:
+            # Special handling for URL-containing questions
+            if tool_name == "read_webpage" and "url" not in args:
+                if "args" in args and isinstance(args["args"], dict) and "url" in args["args"]:
+                    args = args["args"]
+                elif "http" in str(args):
+                    url = re.search(r'https?://[^\s]+', str(args)).group()
+                    args = {"url": url}
+            return str(self.tools[tool_name](**args))[:MAX_RESULT_LENGTH]
+        except Exception as e:
+            return f"Tool error: {str(e)}"
+# --- Evaluation Runner ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
+        return "Please login first", None
+    agent = GAIA_Agent()
+    questions_url = f"{DEFAULT_API_URL}/questions"
+    submit_url = f"{DEFAULT_API_URL}/submit"
     try:
+        response = requests.get(questions_url, timeout=15)
+        questions_data = response.json()
     except Exception as e:
+        return f"Failed to get questions: {str(e)}", None
     results = []
     answers = []
+    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
+        question = item.get("question")
+        if not task_id or not question:
             continue
+        print(f"Processing question {i+1}/{len(questions_data)}")
+        answer = agent(question)
+        answers.append({"task_id": task_id, "submitted_answer": answer})
+        results.append({
+            "Task ID": task_id,
+            "Question": question[:100] + "..." if len(question) > 100 else question,
+            "Answer": answer[:100] + "..." if len(answer) > 100 else answer
+        })
+    submission = {
+        "username": profile.username,
+        "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}",
+        "answers": answers
+    }
     try:
+        response = requests.post(submit_url, json=submission, timeout=30)
         result = response.json()
+        return f"Submitted! Score: {result.get('score', 'N/A')}", pd.DataFrame(results)
     except Exception as e:
+        return f"Submission failed: {str(e)}", pd.DataFrame(results)
+# --- Gradio Interface ---
+with gr.Blocks(title="Enhanced GAIA Agent") as demo:
+    gr.Markdown("## 🚀 Enhanced GAIA Agent Evaluation")
+    gr.Markdown("""
+    Improved version with:
+    - Better tool utilization
+    - Increased step/token limits
+    - Enhanced error handling
+    """)
+    with gr.Row():
+        gr.LoginButton()
+        run_btn = gr.Button("Run Evaluation", variant="primary")
+    output_status = gr.Textbox(label="Status")
+    results_table = gr.DataFrame(label="Results")
+    run_btn.click(
+        run_and_submit_all,
+        outputs=[output_status, results_table]
+    )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)