Spaces:

Mehedi2
/

new_assignment

Sleeping

App Files Files Community

Mehedi2 commited on Sep 28, 2025

Commit

352cf41

verified ·

1 Parent(s): cceff53

Update app.py

Browse files

Files changed (1) hide show

app.py +216 -390

app.py CHANGED Viewed

@@ -2,10 +2,12 @@ import os
 import requests
 import json
 import gradio as gr
-from typing import Dict, Any, Optional
-# Set your OpenRouter API key as environment variable
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
 class OpenRouterLLM:
     def __init__(self, api_key: str, model: str = "deepseek/deepseek-v3.1-terminus"):
@@ -13,17 +15,17 @@ class OpenRouterLLM:
         self.model = model
         self.base_url = "https://openrouter.ai/api/v1/chat/completions"
-    def __call__(self, prompt: str, max_tokens: int = 1000, temperature: float = 0.3) -> str:
-        """Make API call to OpenRouter with DeepSeek V3.1 Terminus"""
         if not self.api_key or not self.api_key.startswith('sk-or-v1-'):
-            return "Error: Invalid OpenRouter API key. Please configure your API key."
         headers = {
             "Authorization": f"Bearer {self.api_key}",
             "Content-Type": "application/json",
             "HTTP-Referer": "https://huggingface.co/spaces/Mehedi2/Gaia-Test-Agent",
-            "X-Title": "AI Navigation Agent"
         }
         payload = {
@@ -31,7 +33,12 @@ class OpenRouterLLM:
             "messages": [
                 {
                     "role": "system",
-                    "content": "You are a helpful AI assistant. Answer questions clearly and accurately."
                 },
                 {
                     "role": "user",
@@ -51,432 +58,261 @@ class OpenRouterLLM:
                 timeout=30
             )
-            if response.status_code == 401:
-                return "Error: Invalid API key or unauthorized."
-            elif response.status_code == 402:
-                return "Error: Insufficient credits in OpenRouter account."
-            elif response.status_code == 429:
-                return "Error: Rate limit exceeded. Please wait and try again."
-            elif response.status_code != 200:
-                return f"Error: HTTP {response.status_code} - {response.text[:200]}"
             result = response.json()
             if "choices" in result and len(result["choices"]) > 0:
                 return result["choices"][0]["message"]["content"].strip()
             else:
-                return "Error: No response content received."
-        except requests.exceptions.Timeout:
-            return "Error: Request timeout. Please try again."
-        except requests.exceptions.RequestException as e:
-            return f"Error calling OpenRouter API: {str(e)}"
         except Exception as e:
             return f"Error: {str(e)}"
-def run_agent(prompt: str) -> str:
-    """
-    Main function for GAIA evaluation
-    Takes any text prompt and returns a response
-    """
-    try:
-        # Check if API key is available
-        if not OPENROUTER_API_KEY:
-            return "Error: No API key configured. Please set OPENROUTER_API_KEY environment variable."
-        # Initialize the LLM
-        llm = OpenRouterLLM(api_key=OPENROUTER_API_KEY, model="deepseek/deepseek-v3.1-terminus")
-        # Check if this is a navigation-related query
-        navigation_keywords = ['route', 'navigation', 'direction', 'coordinate', 'latitude', 'longitude', 'drive', 'travel']
-        if any(keyword in prompt.lower() for keyword in navigation_keywords):
-            # Try to extract coordinates or provide navigation guidance
             enhanced_prompt = f"""
-You are a navigation assistant. The user asked: "{prompt}"
-If they provided coordinates, help them with navigation. If not, ask for specific locations or coordinates.
-Provide helpful navigation-related information.
-"""
-        else:
-            # General AI assistant prompt
-            enhanced_prompt = f"""
-You are a helpful AI assistant. Please answer the following question accurately and thoroughly:
-{prompt}
-Provide a clear, factual response based on your knowledge.
-"""
-        # Get response from LLM
-        response = llm(enhanced_prompt, max_tokens=1500, temperature=0.3)
-        return response
-    except Exception as e:
-        return f"Error processing request: {str(e)}"
-def fetch_route_from_osrm(origin: str, destination: str) -> str:
-    """Fetch route from OSRM API"""
-    try:
-        # Validate coordinates
-        origin_parts = origin.split(',')
-        dest_parts = destination.split(',')
-        if len(origin_parts) != 2 or len(dest_parts) != 2:
-            return "Error: Coordinates must be in 'longitude,latitude' format"
-        # Parse coordinates
-        float(origin_parts[0]), float(origin_parts[1])
-        float(dest_parts[0]), float(dest_parts[1])
-    except (ValueError, IndexError):
-        return "Error: Invalid coordinate format"
-    url = f"http://router.project-osrm.org/route/v1/driving/{origin};{destination}"
-    params = {
-        "overview": "false",
-        "steps": "true",
-        "geometries": "geojson"
-    }
-    try:
-        response = requests.get(url, params=params, timeout=15)
-        response.raise_for_status()
-        data = response.json()
-        if not data.get("routes") or len(data["routes"]) == 0:
-            return "No route found between the specified locations."
-        route = data["routes"][0]
-        total_distance_km = route.get("distance", 0) / 1000
-        total_duration_min = route.get("duration", 0) / 60
-        # Process turn-by-turn instructions
-        instructions = []
-        step_number = 1
-        for leg in route["legs"]:
-            for step in leg["steps"]:
-                maneuver = step.get("maneuver", {})
-                step_type = maneuver.get("type", "continue")
-                modifier = maneuver.get("modifier", "")
-                road_name = step.get("name", "")
-                distance_m = step.get("distance", 0)
-                if distance_m < 10:
-                    continue
-                instruction = f"{step_number}. "
-                if step_type == "depart":
-                    direction = "Start your journey"
-                    if modifier:
-                        direction += f" heading {modifier}"
-                    if road_name:
-                        direction += f" on {road_name}"
-                elif step_type == "arrive":
-                    instruction += "You have arrived at your destination!"
-                    instructions.append(instruction)
-                    break
-                elif step_type == "turn":
-                    direction = f"Turn {modifier}" if modifier else "Turn"
-                    if road_name:
-                        direction += f" onto {road_name}"
-                elif step_type == "merge":
-                    direction = f"Merge {modifier}" if modifier else "Merge"
-                    if road_name:
-                        direction += f" onto {road_name}"
-                elif step_type == "continue":
-                    direction = "Continue straight"
-                    if road_name:
-                        direction += f" on {road_name}"
-                else:
-                    direction = f"{step_type.replace('_', ' ').title()}"
-                    if modifier:
-                        direction += f" {modifier}"
-                    if road_name:
-                        direction += f" on {road_name}"
-                if distance_m >= 100:
-                    if distance_m >= 1000:
-                        direction += f" for {distance_m/1000:.1f} km"
-                    else:
-                        direction += f" for {distance_m:.0f} meters"
-                instruction += direction
-                instructions.append(instruction)
-                step_number += 1
-        route_summary = f"""ROUTE SUMMARY
-Distance: {total_distance_km:.1f} km
-Estimated Time: {total_duration_min:.0f} minutes
-From: {origin} to {destination}
-TURN-BY-TURN DIRECTIONS:
-{chr(10).join(instructions)}
-Total Steps: {len(instructions)}
-"""
-        return route_summary.strip()
-    except Exception as e:
-        return f"Error fetching route: {str(e)}"
-def navigate_with_ai(origin_lat, origin_lon, dest_lat, dest_lon, progress=gr.Progress()):
-    """Main navigation function for Gradio interface"""
-    progress(0, desc="Starting navigation...")
-    # Validate inputs
-    try:
-        origin_lat = float(origin_lat)
-        origin_lon = float(origin_lon)
-        dest_lat = float(dest_lat)
-        dest_lon = float(dest_lon)
-    except (ValueError, TypeError):
-        return "Error: Please enter valid numeric coordinates."
-    # Check coordinate ranges
-    if not (-90 <= origin_lat <= 90) or not (-180 <= origin_lon <= 180):
-        return "Error: Origin coordinates out of valid range."
-    if not (-90 <= dest_lat <= 90) or not (-180 <= dest_lon <= 180):
-        return "Error: Destination coordinates out of valid range."
-    # Format coordinates
-    origin = f"{origin_lon},{origin_lat}"
-    destination = f"{dest_lon},{dest_lat}"
-    progress(0.3, desc="Fetching route data...")
-    # Get route from OSRM
-    raw_route = fetch_route_from_osrm(origin, destination)
-    if raw_route.startswith("Error"):
-        return raw_route
-    progress(0.7, desc="Generating AI summary...")
-    # Check if API key is available
-    if not OPENROUTER_API_KEY:
-        return f"""Warning: No API key configured. Showing raw route data:
-{raw_route}
-To get AI-enhanced summaries, please configure your OpenRouter API key in the Space settings."""
-    # Generate AI summary
-    llm = OpenRouterLLM(api_key=OPENROUTER_API_KEY, model="deepseek/deepseek-v3.1-terminus")
-    prompt = f"""
-Analyze this route information and create a helpful navigation summary:
-{raw_route}
-Please provide:
-1. A brief overview of the journey
-2. Simplified directions with key landmarks
-3. Any important notes about the route
-4. Travel tips if relevant
-Format your response to be clear and easy to follow.
-"""
-    progress(0.9, desc="Finalizing response...")
-    ai_summary = llm(prompt, max_tokens=1200, temperature=0.2)
     progress(1.0, desc="Complete!")
-    return ai_summary
-# Predefined location examples
-LOCATION_EXAMPLES = {
-    "Dhaka, Bangladesh": (23.8103, 90.4125),
-    "Chittagong, Bangladesh": (22.3569, 91.7832),
-    "London, UK": (51.5074, -0.1278),
-    "New York, USA": (40.7128, -74.0060),
-    "Paris, France": (48.8566, 2.3522),
-    "Tokyo, Japan": (35.6762, 139.6503),
-    "Sydney, Australia": (-33.8688, 151.2093)
-}
-def set_example_location(location_name, is_destination=False):
-    """Set example location coordinates"""
-    if location_name in LOCATION_EXAMPLES:
-        lat, lon = LOCATION_EXAMPLES[location_name]
-        return lat, lon
-    return None, None
 # Create Gradio interface
 def create_gradio_app():
-    with gr.Blocks(
-        title="AI Navigation Agent",
-        theme=gr.themes.Soft(),
-        css="""
-        .main-header {
-            text-align: center;
-            background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
-            color: white;
-            padding: 20px;
-            border-radius: 10px;
-            margin-bottom: 20px;
-        }
-        """
-    ) as app:
         gr.HTML("""
-        <div class="main-header">
-            <h1>AI Navigation Agent</h1>
-            <p>Get AI-powered route planning with DeepSeek V3.1 Terminus</p>
         </div>
         """)
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("### Origin (Starting Point)")
-                with gr.Row():
-                    origin_lat = gr.Number(
-                        label="Latitude",
-                        placeholder="e.g., 23.8103",
-                        value=23.8103,
-                        precision=6
-                    )
-                    origin_lon = gr.Number(
-                        label="Longitude",
-                        placeholder="e.g., 90.4125",
-                        value=90.4125,
-                        precision=6
-                    )
-                origin_examples = gr.Dropdown(
-                    choices=list(LOCATION_EXAMPLES.keys()),
-                    label="Or choose a preset location",
-                    value=None
-                )
-            with gr.Column():
-                gr.Markdown("### Destination (End Point)")
-                with gr.Row():
-                    dest_lat = gr.Number(
-                        label="Latitude",
-                        placeholder="e.g., 22.3569",
-                        value=22.3569,
-                        precision=6
-                    )
-                    dest_lon = gr.Number(
-                        label="Longitude",
-                        placeholder="e.g., 91.7832",
-                        value=91.7832,
-                        precision=6
-                    )
-                dest_examples = gr.Dropdown(
-                    choices=list(LOCATION_EXAMPLES.keys()),
-                    label="Or choose a preset location",
-                    value=None
-                )
-        with gr.Row():
-            clear_btn = gr.Button("Clear", variant="secondary")
-            navigate_btn = gr.Button("Get Navigation", variant="primary", size="lg")
-        with gr.Row():
-            output = gr.Textbox(
-                label="Navigation Result",
-                lines=20,
-                placeholder="Your navigation instructions will appear here...",
-                show_copy_button=True
             )
-        # Add a simple chat interface for GAIA testing
-        with gr.Row():
-            gr.Markdown("### General AI Assistant (for GAIA evaluation)")
-        with gr.Row():
-            chat_input = gr.Textbox(
-                label="Ask any question",
-                placeholder="Type your question here...",
                 lines=3
             )
-        with gr.Row():
-            chat_btn = gr.Button("Ask AI", variant="primary")
-        with gr.Row():
-            chat_output = gr.Textbox(
-                label="AI Response",
-                lines=10,
-                placeholder="AI response will appear here...",
-                show_copy_button=True
             )
         gr.Markdown("""
         ### How to Use:
-        1. **Navigation**: Enter coordinates for route planning
-        2. **General Questions**: Use the chat interface below for any questions
-        3. **GAIA Testing**: The chat interface is used for GAIA evaluation
-        ### Coordinate Format:
-        - Latitude: -90 to 90 (North/South)
-        - Longitude: -180 to 180 (East/West)
-        - Example: Dhaka is at 23.8103, 90.4125
         """)
-        # Event handlers
-        def set_origin_example(location):
-            if location:
-                lat, lon = set_example_location(location)
-                return lat, lon
-            return gr.update(), gr.update()
-        def set_dest_example(location):
-            if location:
-                lat, lon = set_example_location(location)
-                return lat, lon
-            return gr.update(), gr.update()
-        def clear_all():
-            return "", "", "", "", None, None, ""
-        # Wire up events
-        origin_examples.change(
-            fn=set_origin_example,
-            inputs=[origin_examples],
-            outputs=[origin_lat, origin_lon]
-        )
-        dest_examples.change(
-            fn=set_dest_example,
-            inputs=[dest_examples],
-            outputs=[dest_lat, dest_lon]
-        )
-        navigate_btn.click(
-            fn=navigate_with_ai,
-            inputs=[origin_lat, origin_lon, dest_lat, dest_lon],
-            outputs=[output],
-            show_progress=True
-        )
-        clear_btn.click(
-            fn=clear_all,
-            outputs=[origin_lat, origin_lon, dest_lat, dest_lon, origin_examples, dest_examples, output]
-        )
-        # Chat interface for GAIA
-        chat_btn.click(
-            fn=run_agent,
-            inputs=[chat_input],
-            outputs=[chat_output]
-        )
     return app
@@ -484,17 +320,7 @@ def create_gradio_app():
 if __name__ == "__main__":
     app = create_gradio_app()
-    # Check if running on Hugging Face Spaces
     if os.getenv("SPACE_ID"):
-        # Running on HF Spaces
-        app.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            show_api=False
-        )
     else:
-        # Running locally
-        app.launch(
-            share=True,
-            show_api=False
-        )

 import requests
 import json
 import gradio as gr
+from typing import Dict, List, Any
+import time
+# Your OpenRouter API key
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
+GAIA_API_BASE = "https://huggingface.co/api/gaia"  # Replace with actual GAIA API URL
 class OpenRouterLLM:
     def __init__(self, api_key: str, model: str = "deepseek/deepseek-v3.1-terminus"):
         self.model = model
         self.base_url = "https://openrouter.ai/api/v1/chat/completions"
+    def __call__(self, prompt: str, max_tokens: int = 1000, temperature: float = 0.1) -> str:
+        """Make API call to OpenRouter"""
         if not self.api_key or not self.api_key.startswith('sk-or-v1-'):
+            return "Error: Invalid OpenRouter API key"
         headers = {
             "Authorization": f"Bearer {self.api_key}",
             "Content-Type": "application/json",
             "HTTP-Referer": "https://huggingface.co/spaces/Mehedi2/Gaia-Test-Agent",
+            "X-Title": "GAIA Test Agent"
         }
         payload = {
             "messages": [
                 {
                     "role": "system",
+                    "content": """You are a helpful AI assistant designed to answer questions accurately and concisely.
+For GAIA evaluation, provide EXACT answers without explanation unless asked.
+- For math questions, give just the number
+- For yes/no questions, give just "Yes" or "No"
+- For factual questions, give just the fact
+- Be precise and direct."""
                 },
                 {
                     "role": "user",
                 timeout=30
             )
+            if response.status_code != 200:
+                return f"API Error: {response.status_code}"
             result = response.json()
             if "choices" in result and len(result["choices"]) > 0:
                 return result["choices"][0]["message"]["content"].strip()
             else:
+                return "Error: No response content received"
         except Exception as e:
             return f"Error: {str(e)}"
+class GAIAAgent:
+    def __init__(self, api_key: str):
+        self.llm = OpenRouterLLM(api_key=api_key)
+        self.api_key = api_key
+    def run_agent(self, prompt: str) -> str:
+        """
+        Main function for GAIA evaluation
+        This is what GAIA calls to get answers
+        """
+        try:
+            # Process the question to get a direct answer
             enhanced_prompt = f"""
+Question: {prompt}
+Analyze this question carefully and provide the exact answer. Do not include explanations, reasoning, or extra text unless specifically asked for reasoning.
+Examples of good responses:
+- Math question "What is 15 + 27?" → Answer: "42"
+- Yes/No question "Is Paris the capital of France?" → Answer: "Yes"
+- Factual question "What is the capital of Japan?" → Answer: "Tokyo"
+Your answer:"""
+            response = self.llm(enhanced_prompt, max_tokens=500, temperature=0.1)
+            # Clean up the response to get just the answer
+            answer = self.clean_answer(response)
+            return answer
+        except Exception as e:
+            return f"Error: {str(e)}"
+    def clean_answer(self, response: str) -> str:
+        """Clean the response to extract just the answer"""
+        response = response.strip()
+        # Remove common prefixes
+        prefixes_to_remove = [
+            "Answer:", "The answer is:", "Response:", "Result:",
+            "Final answer:", "Solution:", "A:", "Answer is:"
+        ]
+        for prefix in prefixes_to_remove:
+            if response.lower().startswith(prefix.lower()):
+                response = response[len(prefix):].strip()
+        # Remove quotes if they wrap the entire answer
+        if response.startswith('"') and response.endswith('"'):
+            response = response[1:-1]
+        return response
+    def get_questions(self) -> List[Dict]:
+        """Get questions from GAIA API"""
+        try:
+            response = requests.get(f"{GAIA_API_BASE}/questions", timeout=30)
+            if response.status_code == 200:
+                return response.json()
+            else:
+                return []
+        except:
+            return []
+    def get_random_question(self) -> Dict:
+        """Get a random question from GAIA API"""
+        try:
+            response = requests.get(f"{GAIA_API_BASE}/random-question", timeout=30)
+            if response.status_code == 200:
+                return response.json()
+            else:
+                return {}
+        except:
+            return {}
+    def submit_answers(self, username: str, agent_code: str, answers: List[Dict]) -> Dict:
+        """Submit answers to GAIA for scoring"""
+        try:
+            payload = {
+                "username": username,
+                "agent_code": agent_code,
+                "answers": answers
+            }
+            response = requests.post(
+                f"{GAIA_API_BASE}/submit",
+                json=payload,
+                timeout=60
+            )
+            if response.status_code == 200:
+                return response.json()
+            else:
+                return {"error": f"Submission failed: {response.status_code}"}
+        except Exception as e:
+            return {"error": f"Submission error: {str(e)}"}
+# Initialize the agent
+agent = GAIAAgent(api_key=OPENROUTER_API_KEY)
+def run_agent(prompt: str) -> str:
+    """Main function that GAIA will call"""
+    return agent.run_agent(prompt)
+def test_single_question():
+    """Test the agent with a single question"""
+    question = agent.get_random_question()
+    if question:
+        answer = run_agent(question.get("Question", ""))
+        return f"Question: {question.get('Question', '')}\nAnswer: {answer}"
+    return "Failed to get question"
+def run_full_evaluation(username: str, progress=gr.Progress()):
+    """Run full GAIA evaluation"""
+    if not username:
+        return "Please provide your Hugging Face username"
+    if not OPENROUTER_API_KEY:
+        return "Please configure your OpenRouter API key"
+    progress(0.1, desc="Getting questions...")
+    # Get all questions
+    questions = agent.get_questions()
+    if not questions:
+        return "Failed to retrieve questions from GAIA API"
+    progress(0.2, desc=f"Processing {len(questions)} questions...")
+    # Process each question
+    answers = []
+    for i, question in enumerate(questions):
+        progress(0.2 + (0.7 * i / len(questions)), desc=f"Processing question {i+1}/{len(questions)}")
+        task_id = question.get("task_id", "")
+        question_text = question.get("Question", "")
+        if question_text:
+            answer = run_agent(question_text)
+            answers.append({
+                "task_id": task_id,
+                "submitted_answer": answer
+            })
+        # Small delay to avoid rate limiting
+        time.sleep(0.5)
+    progress(0.9, desc="Submitting answers...")
+    # Submit answers
+    agent_code = f"https://huggingface.co/spaces/{username}/Gaia-Test-Agent/tree/main"
+    result = agent.submit_answers(username, agent_code, answers)
     progress(1.0, desc="Complete!")
+    if "error" in result:
+        return f"Submission failed: {result['error']}"
+    else:
+        score = result.get("score", 0)
+        return f"Evaluation complete!\nScore: {score}%\nAnswers submitted: {len(answers)}\nCheck the leaderboard for your ranking!"
 # Create Gradio interface
 def create_gradio_app():
+    with gr.Blocks(title="GAIA Test Agent", theme=gr.themes.Soft()) as app:
         gr.HTML("""
+        <div style="text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
+            <h1>GAIA Test Agent</h1>
+            <p>AI Agent for GAIA Benchmark Evaluation</p>
         </div>
         """)
+        with gr.Tab("Single Question Test"):
+            test_btn = gr.Button("Test Random Question", variant="primary")
+            test_output = gr.Textbox(
+                label="Test Result",
+                lines=10,
+                placeholder="Test results will appear here..."
+            )
+            test_btn.click(
+                fn=test_single_question,
+                outputs=[test_output]
+            )
+        with gr.Tab("Full Evaluation"):
+            gr.Markdown("### Run Full GAIA Evaluation")
+            username_input = gr.Textbox(
+                label="Hugging Face Username",
+                placeholder="Enter your HF username",
+                info="This will be used for the leaderboard"
+            )
+            eval_btn = gr.Button("Run Full Evaluation", variant="primary")
+            eval_output = gr.Textbox(
+                label="Evaluation Results",
+                lines=15,
+                placeholder="Evaluation results will appear here..."
+            )
+            eval_btn.click(
+                fn=run_full_evaluation,
+                inputs=[username_input],
+                outputs=[eval_output],
+                show_progress=True
             )
+        with gr.Tab("Manual Testing"):
+            gr.Markdown("### Test Individual Questions")
+            manual_input = gr.Textbox(
+                label="Enter Question",
+                placeholder="Type a question to test...",
                 lines=3
             )
+            manual_btn = gr.Button("Get Answer", variant="primary")
+            manual_output = gr.Textbox(
+                label="Answer",
+                lines=5,
+                placeholder="Answer will appear here..."
+            )
+            manual_btn.click(
+                fn=run_agent,
+                inputs=[manual_input],
+                outputs=[manual_output]
             )
         gr.Markdown("""
         ### How to Use:
+        1. **Single Question Test**: Test your agent with one random question from GAIA
+        2. **Full Evaluation**: Run the complete evaluation and submit to leaderboard
+        3. **Manual Testing**: Test your agent with custom questions
+        ### Requirements:
+        - Set your OpenRouter API key in Space secrets as `OPENROUTER_API_KEY`
+        - Keep your Space public for leaderboard verification
+        - Your HF username will appear on the leaderboard
         """)
     return app
 if __name__ == "__main__":
     app = create_gradio_app()
     if os.getenv("SPACE_ID"):
+        app.launch(server_name="0.0.0.0", server_port=7860, show_api=False)
     else:
+        app.launch(share=True, show_api=False)