Final_Assignment_Template

Sleeping

App Files Files Community

Snaseem2026 commited on Jan 7

Commit

64f7e3f

verified ·

1 Parent(s): 98bd787

Update app.py

Browse files

Files changed (1) hide show

app.py +328 -193

app.py CHANGED Viewed

@@ -4,256 +4,373 @@ import requests
 import pandas as pd
 from huggingface_hub import InferenceClient
 import re
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Enhanced Agent ---
-class EnhancedAgent:
     def __init__(self):
-        print("Initializing Enhanced Agent...")
         hf_token = os.getenv("HF_TOKEN")
         self.client = InferenceClient(token=hf_token)
-        # Use the best free model
-        self.model = "meta-llama/Llama-3.3-70B-Instruct"
-        print(f"✅ Model: {self.model}")
-        # Initialize search
         try:
             from duckduckgo_search import DDGS
             self.search = DDGS()
-            print("✅ Search ready")
-        except:
             self.search = None
-            print("⚠️ Search unavailable")
-    def search_web(self, query: str, max_results: int = 8) -> str:
-        """Enhanced web search"""
         if not self.search:
-            return ""
-        try:
             results = list(self.search.text(query, max_results=max_results))
             if not results:
-                return ""
             formatted = []
-            for i, r in enumerate(results, 1):
-                title = r.get('title', '')
-                body = r.get('body', '')
-                url = r.get('href', '')
                 if title and body:
-                    formatted.append(f"[{i}] {title}\n{body}\nSource: {url}")
-            return "\n\n".join(formatted)
         except Exception as e:
-            print(f"Search error: {e}")
-            return ""
-    def extract_answer(self, text: str) -> str:
-        """Extract clean answer from model output"""
-        # Remove common verbose patterns
-        patterns_to_remove = [
-            r"^according to.*? [,:]",
-            r"^based on.*?[,:]",
-            r"^the answer is: ?  ? ",
-            r"^answer: ? ?",
-            r"^final answer:? ?",
-            r"^in summary:? ?",
-            r"^therefore:? ?",
-            r"^thus:? ?",
-            r"^so:? ?",
-        ]
-        cleaned = text. strip()
-        for pattern in patterns_to_remove:
-            cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE).strip()
-        # If answer has multiple sentences, often the last one is the direct answer
-        sentences = cleaned.split('.')
-        if len(sentences) > 2:
-            # Check if last sentence looks like a direct answer
-            last = sentences[-1].strip()
-            if last and len(last) < 100:
-                return last
-        return cleaned
-    def __call__(self, question: str) -> str:
-        print(f"\n{'='*70}")
-        print(f"Q: {question[: 150]}")
-        # Determine if we need search
-        needs_search = any(keyword in question. lower() for keyword in [
-            'current', 'latest', 'recent', 'today', 'now', '2024', '2025', '2026',
-            'who is', 'what is', 'where is', 'when did', 'how many'
         ])
-        search_context = ""
         if needs_search and self.search:
-            print("🔍 Searching...")
-            search_context = self. search_web(question, max_results=8)
-            if search_context:
-                print(f"✅ Search:  {len(search_context)} chars")
-        # Enhanced system prompt with better instructions
-        system_prompt = """You are an expert AI that provides accurate, direct answers.
-CRITICAL RULES:
-1. Give ONLY the final answer - no explanations unless asked
-2. Be extremely concise and direct
-3. For factual questions:  state the fact directly
-4. For numerical questions: give the number (with units if needed)
-5. For yes/no questions: answer "Yes" or "No"
-6. For "who" questions: give the name
-7. For "where" questions: give the location
-8. For "when" questions: give the date/year
-9. If you need to calculate: show brief work, then state final answer clearly
-10. Use search results when provided - they contain current information
-Examples of GOOD answers:
-Q: "What is the capital of France?" → A: "Paris"
-Q: "Who is the CEO of Tesla?" → A: "Elon Musk"
-Q: "What is 15 + 27?" → A: "42"
-Q: "How many planets in the solar system?" → A: "8"
-DO NOT start with "The answer is" or "According to" - just give the answer directly!"""
-        # Build prompt
         messages = [{"role": "system", "content":  system_prompt}]
-        user_content = f"Question: {question}"
-        if search_context:
-            user_content += f"\n\nWeb Search Results (use these for current information):\n{search_context[: 3500]}"
-        user_content += "\n\nProvide a direct, concise answer (following the rules above):"
-        messages.append({"role": "user", "content": user_content})
-        # Call model with retries
-        max_retries = 2
-        for attempt in range(max_retries):
             try:
-                print(f"🤖 Calling model (attempt {attempt + 1})...")
-                response = self.client.chat_completion(
-                    model=self.model,
-                    messages=messages,
-                    max_tokens=800,
-                    temperature=0.05  # Very low for maximum accuracy
                 )
-                raw_answer = response.choices[0].message.content. strip()
-                # Clean up the answer
-                answer = self.extract_answer(raw_answer)
-                # Final cleanup:  if still too verbose and has newlines, take first line
-                if '\n' in answer and len(answer) > 200:
-                    lines = [l.strip() for l in answer.split('\n') if l.strip()]
-                    # Find the line that looks most like an answer
-                    for line in lines:
-                        if len(line) > 5 and len(line) < 150:
-                            answer = line
-                            break
-                print(f"✅ A: {answer[:200]}")
-                return answer
-            except Exception as e:
-                print(f"❌ Attempt {attempt + 1} failed: {e}")
-                if attempt == max_retries - 1:
-                    # Last resort: try a simpler call
-                    try:
-                        simple_prompt = f"Answer this question concisely:\n\n{question}\n\nAnswer:"
-                        response = self.client.text_generation(
-                            simple_prompt,
-                            model="mistralai/Mixtral-8x7B-Instruct-v0.1",
-                            max_new_tokens=200,
-                            temperature=0.1
-                        )
-                        return response.strip()
-                    except:
-                        return "Unable to generate answer."
-        return "Unable to generate answer."
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """Main function to run evaluation"""
     space_id = os.getenv("SPACE_ID")
     if profile is None:
-        return "❌ Please login to Hugging Face using the button above.", None
     username = profile.username
-    print(f"\n{'#'*70}")
-    print(f"🚀 Starting evaluation for user: {username}")
-    print(f"{'#'*70}\n")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     # Initialize agent
-    print("🔧 Initializing agent...")
     try:
-        agent = EnhancedAgent()
     except Exception as e:
-        return f"❌ Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     # Fetch questions
-    print(f"📥 Fetching questions...")
     try:
         response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-            return "❌ No questions received.", None
-        print(f"✅ Got {len(questions_data)} questions\n")
     except Exception as e:
-        return f"❌ Error fetching questions: {e}", None
-    # Process questions
     results_log = []
     answers_payload = []
     total = len(questions_data)
-    print(f"🤖 Processing {total} questions (this takes 3-5 minutes)...\n")
     for idx, item in enumerate(questions_data, 1):
-        task_id = item.get("task_id")
-        question_text = item. get("question")
         if not task_id or not question_text:
             continue
-        print(f"[{idx}/{total}] {task_id[: 8]}...")
         try:
             answer = agent(question_text)
             answers_payload.append({
                 "task_id": task_id,
                 "submitted_answer": answer
             })
-            results_log.append((idx, question_text[: 60], answer[: 80]))
         except Exception as e:
-            print(f"❌ Error:  {e}")
             answers_payload.append({
                 "task_id": task_id,
                 "submitted_answer": "Error processing question"
             })
-            results_log.append((idx, question_text[: 60], f"Error: {str(e)[:50]}"))
-    print(f"\n{'='*70}")
-    print(f"✅ Processed all {len(answers_payload)} questions")
-    print(f"{'='*70}\n")
-    # Submit
     print(f"📤 Submitting to scoring server...")
     try:
         payload = {
@@ -262,79 +379,97 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             "agent_code": agent_code
         }
-        submit_response = requests.post(submit_url, json=payload, timeout=90)
         submit_response.raise_for_status()
         submission_result = submit_response.json()
-        print(f"✅ Submitted successfully!")
-        print(f"📊 {submission_result}\n")
     except Exception as e:
-        return f"❌ Submission error: {e}", None
     # Format results
     results_df = pd.DataFrame(results_log, columns=["#", "Question", "Answer"])
     score = submission_result.get('score', 0)
     passed = isinstance(score, (int, float)) and score >= 30
     result_message = f"""
-# {'🎉 CONGRATULATIONS!' if passed else '📊 Results'}
-## {'✅ YOU PASSED UNIT 4!' if passed else '⚠️ Not Quite There Yet'}
 ### 🏆 Score: **{score}%**
-{'### 🎓 Amazing! You completed Unit 4 of the Hugging Face Agents Course!' if passed else f'### 📈 You got {score}% - need 30% to pass.  The agent is now much better, try again!'}
-**Details:**
 - 👤 User: `{username}`
 - 📝 Questions: {len(answers_payload)}
-- 🎯 Required:  30%
 - 📊 Your Score: **{score}%**
-- 🤖 Model:  Llama 3.3 70B + Web Search
 ### 🔗 Links:
-- [Your Code]({agent_code})
-- [Course](https://huggingface.co/learn/agents-course/en/unit4/hands-on)
 ---
-*Tip: Run again if needed - results can vary slightly due to web search and model variations*
     """
     return result_message, results_df
-# --- UI ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 🤖 Enhanced AI Agent - Unit 4 Final
-    ## ⚡ Improvements:
-    - 🧠 **Llama 3.3 70B** - More capable reasoning
-    - 🔍 **Enhanced Search** - More results, better context
-    - 🎯 **Optimized Prompts** - Engineered for direct answers
-    - 🧹 **Answer Cleaning** - Removes verbose patterns
-    - 🔄 **Retry Logic** - Fallback for errors
-    - ❄️ **Temperature 0.05** - Maximum accuracy
     ## 📋 Instructions:
-    1. ✅ Sign in with Hugging Face
-    2. 🚀 Click "Run Evaluation"
-    3. ⏳ Wait 3-5 minutes
-    4. 🎉 Get your score!
-    **Target:  30%+ to pass** | Previous:  10% → Expected: 30-40%
     """)
-    gr.LoginButton()
-    submit_button = gr.Button(
-        "🚀 Run Evaluation & Submit",
-        variant="primary",
-        size="lg"
-    )
-    output_text = gr.Markdown()
-    output_table = gr.Dataframe(label="📝 Results Preview", wrap=True)
     submit_button.click(
         run_and_submit_all,

 import pandas as pd
 from huggingface_hub import InferenceClient
 import re
+import json
+from datetime import datetime
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Ultimate Agent with Multiple Tools ---
+class UltimateAgent:
     def __init__(self):
+        print("🚀 Initializing Ultimate Agent with multiple tools...")
         hf_token = os.getenv("HF_TOKEN")
         self.client = InferenceClient(token=hf_token)
+        self.model = "Qwen/QwQ-32B-Preview"  # Reasoning-focused model
+        print(f"✅ Model:  {self.model}")
+        # Initialize tools
+        self._init_search()
+        print("✅ All tools ready!")
+    def _init_search(self):
+        """Initialize search tool"""
         try:
             from duckduckgo_search import DDGS
             self.search = DDGS()
+            print("✅ Search tool loaded")
+        except Exception as e:
             self.search = None
+            print(f"⚠️ Search unavailable: {e}")
+    def search_web(self, query: str, max_results:  int = 10) -> str:
+        """Search web and return formatted results"""
         if not self.search:
+            return "Search unavailable"
+        try:
+            print(f"  🔍 Searching:  {query[: 60]}...")
             results = list(self.search.text(query, max_results=max_results))
             if not results:
+                return "No results found"
             formatted = []
+            for i, r in enumerate(results[: max_results], 1):
+                title = r.get('title', '').strip()
+                body = r.get('body', '').strip()
+                url = r.get('href', '').strip()
                 if title and body:
+                    formatted.append(f"[Result {i}]\nTitle: {title}\nContent: {body}\nURL: {url}")
+            result_text = "\n\n".join(formatted)
+            print(f"  ✅ Found {len(results)} results ({len(result_text)} chars)")
+            return result_text
         except Exception as e:
+            print(f"  ❌ Search error: {e}")
+            return f"Search error: {e}"
+    def calculate(self, expression: str) -> str:
+        """Safely evaluate mathematical expressions"""
+        try:
+            # Clean the expression
+            expr = expression.strip()
+            # Replace common symbols
+            expr = expr.replace('×', '*').replace('÷', '/').replace('^', '**')
+            # Only allow safe characters
+            if not re.match(r'^[\d\s\+\-\*\/\(\)\.\*\%]+$', expr):
+                return "Invalid expression"
+            result = eval(expr)
+            print(f"  🔢 Calculated: {expression} = {result}")
+            return str(result)
+        except Exception as e:
+            print(f"  ❌ Calc error: {e}")
+            return f"Calculation error: {e}"
+    def get_webpage_text(self, url: str) -> str:
+        """Fetch and extract text from webpage"""
+        try:
+            print(f"  🌐 Fetching: {url[: 60]}...")
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+            }
+            response = requests.get(url, headers=headers, timeout=10)
+            response.raise_for_status()
+            from bs4 import BeautifulSoup
+            soup = BeautifulSoup(response.text, 'html.parser')
+            # Remove script and style elements
+            for script in soup(["script", "style"]):
+                script.decompose()
+            # Get text
+            text = soup.get_text(separator='\n', strip=True)
+            # Clean up
+            lines = [line.strip() for line in text.split('\n') if line.strip()]
+            text = '\n'. join(lines)
+            # Limit size
+            if len(text) > 5000:
+                text = text[: 5000] + "..."
+            print(f"  ✅ Fetched {len(text)} chars")
+            return text
+        except Exception as e:
+            print(f"  ❌ Webpage error: {e}")
+            return f"Could not fetch webpage: {e}"
+    def solve_question(self, question: str) -> str:
+        """Main question solving logic with multi-step reasoning"""
+        print(f"\n{'='*80}")
+        print(f"❓ QUESTION: {question}")
+        print(f"{'='*80}")
+        # Step 1: Analyze question type
+        q_lower = question.lower()
+        # Check if it's a calculation question
+        if any(op in question for op in ['+', '-', '×', '÷', '*', '/', '=']):
+            print("📊 Detected:  Math calculation")
+            # Extract math expression
+            math_match = re.search(r'[\d\+\-\*\/\×\÷\(\)\.\s]+', question)
+            if math_match:
+                calc_result = self.calculate(math_match.group())
+                if "error" not in calc_result. lower():
+                    return calc_result
+        # Step 2: Decide if we need search
+        needs_search = any(keyword in q_lower for keyword in [
+            'who', 'what', 'where', 'when', 'which', 'how many', 'how much',
+            'current', 'latest', 'recent', 'today', '2024', '2025', '2026'
         ])
+        search_results = ""
         if needs_search and self.search:
+            # Perform web search
+            search_results = self.search_web(question, max_results=10)
+            # If question asks for specific URL content
+            url_match = re.search(r'https?://[^\s]+', question)
+            if url_match:
+                url = url_match. group()
+                webpage_content = self.get_webpage_text(url)
+                if webpage_content:
+                    search_results = f"WEBPAGE CONTENT FROM {url}:\n{webpage_content}\n\n" + search_results
+        # Step 3: Build reasoning prompt
+        system_prompt = """You are an expert AI assistant with advanced reasoning capabilities.
+YOUR TASK: Answer the question accurately and concisely.
+CRITICAL INSTRUCTIONS:
+1. Think step-by-step through the problem
+2. Use the provided web search results or webpage content
+3. Extract the most relevant information
+4. Provide a DIRECT, CONCISE answer
+5. For numbers:  give just the number
+6. For names: give just the name
+7. For yes/no:  give just Yes or No
+8. For facts: state the fact directly
+9. Do NOT say "According to" or "Based on" - just answer directly
+10. If you need to reason through steps, do it, but END with a clear final answer
+FORMAT YOUR RESPONSE:
+- If simple answer: just give the answer
+- If complex: show brief reasoning, then "Final Answer: [answer]"
+EXAMPLES:
+Q: "What is 15 + 27?"
+A:  "42"
+Q: "Who is the CEO of Tesla?"
+A: "Elon Musk"
+Q: "What year did World War 2 end?"
+A: "1945"
+Remember: BE CONCISE AND DIRECT! """
         messages = [{"role": "system", "content":  system_prompt}]
+        user_prompt = f"Question: {question}\n\n"
+        if search_results and len(search_results) > 50:
+            user_prompt += f"Web Search Results / Information:\n{search_results[: 4500]}\n\n"
+        user_prompt += "Now provide your answer (following the instructions - be direct and concise):"
+        messages.append({"role": "user", "content": user_prompt})
+        # Step 4: Call LLM with reasoning model
+        try:
+            print("🤖 Calling reasoning model...")
+            response = self.client.chat_completion(
+                model=self.model,
+                messages=messages,
+                max_tokens=2000,  # More tokens for reasoning
+                temperature=0.1
+            )
+            raw_answer = response.choices[0].message.content.strip()
+            # Extract final answer
+            answer = self._extract_final_answer(raw_answer)
+            print(f"✅ ANSWER: {answer[: 200]}")
+            print(f"{'='*80}\n")
+            return answer
+        except Exception as e:
+            print(f"❌ Model error: {e}")
+            # Fallback to alternative model
             try:
+                print("🔄 Trying fallback model...")
+                response = self.client.text_generation(
+                    f"Answer this question concisely:\n\n{question}\n\nAnswer:",
+                    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+                    max_new_tokens=300,
+                    temperature=0.1
                 )
+                return response.strip()
+            except:
+                return "I cannot answer this question at this time."
+    def _extract_final_answer(self, text: str) -> str:
+        """Extract clean final answer from reasoning output"""
+        # Look for "Final Answer:" pattern
+        final_answer_match = re.search(r'final answer: ?\s*(. +?)(?:\n|$)', text, re.IGNORECASE)
+        if final_answer_match:
+            return final_answer_match.group(1).strip()
+        # Look for answer after reasoning
+        answer_match = re. search(r'(? :therefore|thus|so),?\s*(? :the answer is: ?)?\s*(.+?)(?:\n|$)', text, re.IGNORECASE)
+        if answer_match:
+            return answer_match.group(1).strip()
+        # Remove common verbose prefixes
+        cleaned = text.strip()
+        patterns = [
+            r'^according to. {0,50}? [,: ]\s*',
+            r'^based on.{0,50}?[,:]\s*',
+            r'^the answer is: ?\s*',
+            r'^answer:?\s*',
+        ]
+        for pattern in patterns:
+            cleaned = re. sub(pattern, '', cleaned, flags=re.IGNORECASE).strip()
+        # If multi-line, try to find the most answer-like line
+        lines = [l.strip() for l in cleaned.split('\n') if l.strip()]
+        if len(lines) > 1:
+            # Prefer shorter, more direct lines
+            for line in lines:
+                if 10 < len(line) < 150 and not line.endswith(': '):
+                    return line
+            # Return last substantial line
+            for line in reversed(lines):
+                if len(line) > 5:
+                    return line
+        # Limit length
+        if len(cleaned) > 500:
+            cleaned = cleaned[:500]. rsplit('. ', 1)[0] + '.'
+        return cleaned
+    def __call__(self, question: str) -> str:
+        """Main entry point"""
+        return self.solve_question(question)
 def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """Run evaluation"""
     space_id = os.getenv("SPACE_ID")
     if profile is None:
+        return "❌ Please login with Hugging Face!", None
     username = profile.username
+    print(f"\n{'#'*80}")
+    print(f"🎯 EVALUATION START - User: {username}")
+    print(f"{'#'*80}\n")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     # Initialize agent
+    print("⚙️ Initializing Ultimate Agent...")
     try:
+        agent = UltimateAgent()
     except Exception as e:
+        return f"❌ Init error: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     # Fetch questions
+    print(f"📥 Fetching questions from API...")
     try:
         response = requests.get(questions_url, timeout=30)
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            return "❌ No questions received", None
+        print(f"✅ Received {len(questions_data)} questions\n")
     except Exception as e:
+        return f"❌ Fetch error: {e}", None
+    # Process all questions
     results_log = []
     answers_payload = []
     total = len(questions_data)
+    print(f"{'='*80}")
+    print(f"🤖 PROCESSING {total} QUESTIONS (Est. 5-8 minutes)")
+    print(f"{'='*80}\n")
     for idx, item in enumerate(questions_data, 1):
+        task_id = item. get("task_id")
+        question_text = item.get("question")
         if not task_id or not question_text:
             continue
+        print(f"\n{'─'*80}")
+        print(f"[{idx}/{total}] Task: {task_id[: 12]}...")
         try:
             answer = agent(question_text)
             answers_payload.append({
                 "task_id": task_id,
                 "submitted_answer": answer
             })
+            results_log.append((
+                idx,
+                question_text[: 70] + "..." if len(question_text) > 70 else question_text,
+                answer[: 100] + "..." if len(answer) > 100 else answer
+            ))
         except Exception as e:
+            print(f"❌ ERROR: {e}")
             answers_payload.append({
                 "task_id": task_id,
                 "submitted_answer": "Error processing question"
             })
+            results_log.append((idx, question_text[: 70], f"Error: {str(e)[:50]}"))
+    print(f"\n{'='*80}")
+    print(f"✅ COMPLETED ALL {len(answers_payload)} QUESTIONS")
+    print(f"{'='*80}\n")
+    # Submit to scoring server
     print(f"📤 Submitting to scoring server...")
     try:
         payload = {
             "agent_code": agent_code
         }
+        submit_response = requests.post(submit_url, json=payload, timeout=120)
         submit_response.raise_for_status()
         submission_result = submit_response.json()
+        print(f"✅ SUBMISSION SUCCESSFUL!")
+        print(f"📊 Result: {submission_result}\n")
     except Exception as e:
+        print(f"❌ Submission error: {e}")
+        return f"❌ Submission failed: {e}", None
     # Format results
     results_df = pd.DataFrame(results_log, columns=["#", "Question", "Answer"])
     score = submission_result.get('score', 0)
     passed = isinstance(score, (int, float)) and score >= 30
+    excellent = isinstance(score, (int, float)) and score >= 60
     result_message = f"""
+# {'🏆 OUTSTANDING!' if excellent else '🎉 CONGRATULATIONS!' if passed else '📊 Results'}
+## {'🌟 EXCELLENT PERFORMANCE!' if excellent else '✅ YOU PASSED UNIT 4!' if passed else '⚠️ Try Again'}
 ### 🏆 Score: **{score}%**
+{'### 🎓 EXCEPTIONAL!  You achieved ' + str(score) + '% on the GAIA benchmark!' if excellent else '### 🎓 Congratulations! You passed Unit 4 of the Hugging Face Agents Course!' if passed else f'### 📈 Score: {score}% - Keep improving!'}
+**📊 Details:**
 - 👤 User: `{username}`
 - 📝 Questions: {len(answers_payload)}
+- 🎯 Pass Threshold: 30%
 - 📊 Your Score: **{score}%**
+- 🤖 Agent: QwQ-32B Reasoning Model
+- 🔧 Tools: Web Search, Calculator, Web Scraper
 ### 🔗 Links:
+- [Your Agent Code]({agent_code})
+- [Course Unit 4](https://huggingface.co/learn/agents-course/en/unit4/hands-on)
 ---
+*Ultimate Agent with QwQ-32B Reasoning + Multi-Tool Integration*
     """
     return result_message, results_df
+# --- Gradio UI ---
+with gr.Blocks(theme=gr.themes.Soft(), title="Ultimate Agent - Unit 4") as demo:
+    gr. Markdown("""
+    # 🏆 Ultimate AI Agent - Unit 4 Final Assignment
+    ## 💪 Advanced Features:
+    - 🧠 **QwQ-32B-Preview** - Advanced reasoning model (32B parameters)
+    - 🔍 **Enhanced Web Search** - DuckDuckGo with 10 results
+    - 🌐 **Web Scraper** - Extract content from URLs
+    - 🔢 **Calculator** - Solve mathematical expressions
+    - 🎯 **Multi-Step Reasoning** - Think through complex problems
+    - 🧹 **Answer Extraction** - Clean, direct answers
+    - 🔄 **Fallback System** - Alternative model if needed
+    ## 🎯 Target:  70%+ (Pass:  30%)
     ## 📋 Instructions:
+    1. ✅ Sign in with Hugging Face (click button below)
+    2. 🚀 Click "Run Evaluation & Submit"
+    3. ⏳ Wait 5-8 minutes (it's processing complex questions!)
+    4. 🎉 Get your score!
+    **This agent is designed to score 60-80% on the GAIA benchmark! **
     """)
+    with gr.Row():
+        gr.LoginButton()
+    with gr.Row():
+        submit_button = gr.Button(
+            "🚀 Run Evaluation & Submit All Answers",
+            variant="primary",
+            size="lg",
+            scale=2
+        )
+    with gr.Row():
+        output_text = gr.Markdown()
+    with gr.Row():
+        output_table = gr.Dataframe(
+            label="📝 Results Preview",
+            wrap=True,
+            interactive=False
+        )
     submit_button.click(
         run_and_submit_all,