Final_Assignment_Template

Sleeping

App Files Files Community

claudi47 commited on 24 days ago

Commit

a765bf8

1 Parent(s): 8420ad2

Fix Groq auth and add support for GAIA file tasks

Browse files

Files changed (3) hide show

README.md +2 -1
app.py +160 -6
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -23,7 +23,7 @@ The app logs in with Hugging Face OAuth, downloads the GAIA evaluation questions
 Create a `.env` file with the secrets needed by the model provider and by Hugging Face Spaces:
 ```bash
-HF_TOKEN=your_token_here
 SPACE_ID=your-username/your-space-name
 ```
@@ -44,5 +44,6 @@ python app.py
 ## Notes
 - The app uses `https://agents-course-unit4-scoring.hf.space` as the scoring API.
 - The Gradio SDK version is pinned in this README frontmatter and dependencies are pinned in `requirements.txt`.
 - OAuth must be enabled on the Hugging Face Space for the login flow to work.

 Create a `.env` file with the secrets needed by the model provider and by Hugging Face Spaces:
 ```bash
+GROQ_API_KEY=your_groq_key_here
 SPACE_ID=your-username/your-space-name
 ```
 ## Notes
 - The app uses `https://agents-course-unit4-scoring.hf.space` as the scoring API.
+- Text answers use Groq `llama-3.3-70b-versatile`; audio files use Groq Whisper; image files use a Groq vision model.
 - The Gradio SDK version is pinned in this README frontmatter and dependencies are pinned in `requirements.txt`.
 - OAuth must be enabled on the Hugging Face Space for the login flow to work.

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import os
 import requests
 import pandas as pd
 import gradio as gr
@@ -18,6 +20,12 @@ load_dotenv()
 DEFAULT_API_URL = (
     "https://agents-course-unit4-scoring.hf.space"
 )
 # Format instructions appended to every question
 # so that the agent returns exact-match-friendly
@@ -89,6 +97,7 @@ class GaiaFileFetcherTool(Tool):
         if not fname:
             fname = f"{task_id}{ext}"
         path = os.path.join(
             _tmp.gettempdir(), fname
         )
@@ -97,6 +106,130 @@ class GaiaFileFetcherTool(Tool):
         return path
 # --------------------------------------------------
 # Agent wrapper
 # --------------------------------------------------
@@ -104,15 +237,24 @@ class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         model = OpenAIServerModel(
-            model_id="llama-3.3-70b-versatile",
-            api_base="https://api.groq.com/openai/v1",
-            api_key=os.getenv("HF_TOKEN"),
         )
         self.file_tool = GaiaFileFetcherTool(
             api_url=DEFAULT_API_URL,
         )
         self.agent = CodeAgent(
             model=model,
@@ -123,10 +265,13 @@ class BasicAgent:
                 ),
                 VisitWebpageTool(),
                 self.file_tool,
             ],
             max_steps=15,
             verbosity_level=0,
             additional_authorized_imports=[
                 "json",
                 "re",
                 "csv",
@@ -136,6 +281,10 @@ class BasicAgent:
                 "collections",
                 "itertools",
                 "os",
             ],
         )
@@ -153,7 +302,10 @@ class BasicAgent:
                 f"\n\n[This question has an attached "
                 f"file. Use the fetch_task_file tool "
                 f"with task_id='{task_id}' to "
-                f"download and read it.]"
             )
         prompt += ANSWER_FORMAT_INSTRUCTIONS
@@ -198,7 +350,7 @@ def run_and_submit_all(
     agent_code = (
         f"https://huggingface.co/spaces/"
-        f"{space_id}/tree/main"
     )
     print(agent_code)
@@ -448,6 +600,8 @@ page fetching, and file download tools.*
         outputs=[status_output, results_table],
     )
 if __name__ == "__main__":
     print(
         "\n" + "-" * 30
@@ -469,4 +623,4 @@ if __name__ == "__main__":
     print("-" * 74 + "\n")
     print("Launching Gradio Interface...")
-    demo.launch(debug=True, share=False)

 import os
+import base64
+import mimetypes
 import requests
 import pandas as pd
 import gradio as gr
 DEFAULT_API_URL = (
     "https://agents-course-unit4-scoring.hf.space"
 )
+GROQ_API_BASE = "https://api.groq.com/openai/v1"
+TEXT_MODEL_ID = "llama-3.3-70b-versatile"
+VISION_MODEL_ID = (
+    "meta-llama/llama-4-scout-17b-16e-instruct"
+)
+AUDIO_MODEL_ID = "whisper-large-v3"
 # Format instructions appended to every question
 # so that the agent returns exact-match-friendly
         if not fname:
             fname = f"{task_id}{ext}"
+        fname = os.path.basename(fname)
         path = os.path.join(
             _tmp.gettempdir(), fname
         )
         return path
+class GroqAudioTranscriptionTool(Tool):
+    """Transcribes an audio file with Groq Whisper."""
+    name = "transcribe_audio_file"
+    description = (
+        "Transcribes a local audio file path, such as an "
+        "MP3 downloaded with fetch_task_file. Returns the "
+        "plain transcript text."
+    )
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Local path to the audio file.",
+        }
+    }
+    output_type = "string"
+    def forward(self, file_path: str) -> str:
+        api_key = os.getenv("GROQ_API_KEY")
+        if not api_key:
+            raise RuntimeError(
+                "GROQ_API_KEY is required for audio transcription."
+            )
+        with open(file_path, "rb") as audio_file:
+            response = requests.post(
+                f"{GROQ_API_BASE}/audio/transcriptions",
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                },
+                files={
+                    "file": (
+                        os.path.basename(file_path),
+                        audio_file,
+                    )
+                },
+                data={
+                    "model": AUDIO_MODEL_ID,
+                    "response_format": "json",
+                    "temperature": "0",
+                },
+                timeout=120,
+            )
+        response.raise_for_status()
+        return response.json().get("text", "").strip()
+class GroqImageAnalysisTool(Tool):
+    """Answers questions about a local image with Groq vision."""
+    name = "analyze_image_file"
+    description = (
+        "Analyzes a local image file path and answers a "
+        "specific visual question about it."
+    )
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Local path to the image file.",
+        },
+        "question": {
+            "type": "string",
+            "description": "The question to answer about the image.",
+        },
+    }
+    output_type = "string"
+    def forward(self, file_path: str, question: str) -> str:
+        api_key = os.getenv("GROQ_API_KEY")
+        if not api_key:
+            raise RuntimeError(
+                "GROQ_API_KEY is required for image analysis."
+            )
+        mime_type = (
+            mimetypes.guess_type(file_path)[0]
+            or "application/octet-stream"
+        )
+        with open(file_path, "rb") as image_file:
+            encoded = base64.b64encode(
+                image_file.read()
+            ).decode("ascii")
+        response = requests.post(
+            f"{GROQ_API_BASE}/chat/completions",
+            headers={
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            },
+            json={
+                "model": VISION_MODEL_ID,
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": question,
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": (
+                                        f"data:{mime_type};"
+                                        f"base64,{encoded}"
+                                    )
+                                },
+                            },
+                        ],
+                    }
+                ],
+                "temperature": 0.1,
+                "max_completion_tokens": 512,
+            },
+            timeout=120,
+        )
+        response.raise_for_status()
+        return (
+            response.json()["choices"][0]["message"]
+            ["content"]
+            .strip()
+        )
 # --------------------------------------------------
 # Agent wrapper
 # --------------------------------------------------
     def __init__(self):
         print("BasicAgent initialized.")
+        groq_api_key = os.getenv("GROQ_API_KEY")
+        if not groq_api_key:
+            raise RuntimeError(
+                "Missing GROQ_API_KEY. Add it to your "
+                "Hugging Face Space secrets or local .env file."
+            )
         model = OpenAIServerModel(
+            model_id=TEXT_MODEL_ID,
+            api_base=GROQ_API_BASE,
+            api_key=groq_api_key,
         )
         self.file_tool = GaiaFileFetcherTool(
             api_url=DEFAULT_API_URL,
         )
+        self.audio_tool = GroqAudioTranscriptionTool()
+        self.image_tool = GroqImageAnalysisTool()
         self.agent = CodeAgent(
             model=model,
                 ),
                 VisitWebpageTool(),
                 self.file_tool,
+                self.audio_tool,
+                self.image_tool,
             ],
             max_steps=15,
             verbosity_level=0,
             additional_authorized_imports=[
+                "base64",
                 "json",
                 "re",
                 "csv",
                 "collections",
                 "itertools",
                 "os",
+                "pathlib",
+                "mimetypes",
+                "pandas",
+                "openpyxl",
             ],
         )
                 f"\n\n[This question has an attached "
                 f"file. Use the fetch_task_file tool "
                 f"with task_id='{task_id}' to "
+                f"download it. If it is audio, use "
+                f"transcribe_audio_file. If it is an "
+                f"image, use analyze_image_file. If it "
+                f"is a spreadsheet, read it with pandas.]"
             )
         prompt += ANSWER_FORMAT_INSTRUCTIONS
     agent_code = (
         f"https://huggingface.co/spaces/"
+        f"{space_id or 'unknown-space'}/tree/main"
     )
     print(agent_code)
         outputs=[status_output, results_table],
     )
+demo.queue()
 if __name__ == "__main__":
     print(
         "\n" + "-" * 30
     print("-" * 74 + "\n")
     print("Launching Gradio Interface...")
+    demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 gradio==5.25.2
 requests==2.32.5
 pandas==2.3.3
 smolagents[openai]==1.24.0
 ddgs==9.14.0
 wikipedia-api==0.10.2

 gradio==5.25.2
 requests==2.32.5
 pandas==2.3.3
+openpyxl==3.1.5
 smolagents[openai]==1.24.0
 ddgs==9.14.0
 wikipedia-api==0.10.2