Final_Assignment_Template

Runtime error

App Files Files Community

oniwaka commited on Jun 28, 2025

Commit

46a487a

verified ·

1 Parent(s): 951ca5c

Update app.py

Browse files

Files changed (1) hide show

app.py +325 -133

app.py CHANGED Viewed

@@ -3,6 +3,13 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
@@ -11,189 +18,277 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class GAIAAgent:
     def __init__(self):
-        self.setup_models()
         self.setup_agent()
-    def setup_models(self):
-        """Configura i modelli necessari"""
-        # Gemini per capacità multimodali
-        genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
-        self.gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp')
-        # Pipeline per trascrizione audio
-        self.whisper_pipeline = pipeline(
-            "automatic-speech-recognition",
-            model="openai/whisper-large-v3",
-            device_map="auto"
-        )
     def setup_agent(self):
-        """Configura l'agente con tools ottimizzati"""
         self.agent = CodeAgent(
             tools=[
-                self.analyze_image,
-                self.transcribe_audio,
-                self.extract_text_from_file,
-                self.web_search,
-                self.calculate_precise
             ],
-            model="microsoft/DialoGPT-medium",
-            max_iterations=10,
-            verbosity=2
         )
     @tool
-    def analyze_image(self, image_path: str, question: str) -> str:
-        """Analizza immagini usando Gemini Vision per domande GAIA"""
         try:
-            import PIL.Image
-            image = PIL.Image.open(image_path)
             prompt = f"""
             Analizza questa immagine per rispondere alla domanda: {question}
-            Fornisci una risposta precisa e dettagliata. Se la domanda richiede:
-            - Conteggio di oggetti: conta accuratamente
-            - Identificazione di testo: trascrivi esattamente
-            - Descrizione di elementi: sii specifico e ordinato
-            - Posizioni relative: usa riferimenti chiari (orario, coordinate)
-            Risposta:
             """
-            response = self.gemini_model.generate_content([prompt, image])
-            return response.text
         except Exception as e:
-            return f"Errore nell'analisi dell'immagine: {str(e)}"
     @tool
-    def transcribe_audio(self, audio_path: str) -> str:
-        """Trascrizione audio ad alta precisione"""
         try:
-            result = self.whisper_pipeline(audio_path)
-            return result["text"]
         except Exception as e:
-            return f"Errore nella trascrizione: {str(e)}"
     @tool
-    def extract_text_from_file(self, file_path: str) -> str:
-        """Estrae testo da vari formati di file"""
         try:
             if file_path.endswith('.txt'):
                 with open(file_path, 'r', encoding='utf-8') as f:
-                    return f.read()
             elif file_path.endswith('.csv'):
                 import pandas as pd
                 df = pd.read_csv(file_path)
-                return df.to_string()
             elif file_path.endswith(('.xlsx', '.xls')):
                 import pandas as pd
                 df = pd.read_excel(file_path)
-                return df.to_string()
-            else:
-                return "Formato file non supportato"
         except Exception as e:
-            return f"Errore nella lettura del file: {str(e)}"
     @tool
-    def web_search(self, query: str) -> str:
-        """Ricerca web per informazioni aggiornate"""
         try:
-            # Implementa ricerca web usando API disponibili
-            # Per semplicità, qui usiamo un placeholder
-            return f"Risultati ricerca per: {query}"
         except Exception as e:
-            return f"Errore nella ricerca: {str(e)}"
     @tool
-    def calculate_precise(self, expression: str) -> str:
-        """Calcoli matematici precisi"""
         try:
-            # Sanitizza l'espressione per sicurezza
-            safe_expr = re.sub(r'[^0-9+\-*/().\s]', '', expression)
-            result = eval(safe_expr)
-            return str(result)
         except Exception as e:
-            return f"Errore nel calcolo: {str(e)}"
     def solve_question(self, question: str, file_path: Optional[str] = None) -> str:
-        """Risolve una domanda GAIA con approccio ottimizzato"""
-        # Prompt engineering ottimizzato per GAIA Level 1[3][8]
         system_prompt = f"""
-        Sei un agente AI specializzato nel risolvere domande del benchmark GAIA Level 1.
-        OBIETTIVO: Fornire risposte ESATTE che corrispondano perfettamente al formato richiesto.
-        REGOLE CRITICHE:
-        1. Leggi attentamente la domanda e identifica il formato di risposta richiesto
-        2. Se c'è un file allegato, analizzalo completamente prima di rispondere
-        3. Per domande numeriche: fornisci solo il numero (es. "42", non "La risposta è 42")
-        4. Per liste: usa il formato esatto richiesto (virgole, punti, etc.)
-        5. Per date: usa il formato specificato nella domanda
-        6. NON aggiungere prefissi come "Risposta:", "Il risultato è:", etc.
-        STRATEGIA DI RISOLUZIONE:
-        1. Analizza la domanda per identificare il tipo di task
-        2. Pianifica i passaggi necessari
-        3. Usa gli strumenti appropriati
-        4. Verifica la risposta prima di fornirla
-        5. Formatta la risposta nel modo ESATTO richiesto
         DOMANDA: {question}
-        {f"FILE ALLEGATO: {file_path}" if file_path else ""}
-        Risolvi step-by-step e fornisci SOLO la risposta finale nel formato richiesto.
         """
         try:
-            # Esegui l'agente con il prompt ottimizzato
             response = self.agent.run(system_prompt)
-            # Post-processing per garantire formato corretto
-            answer = self.clean_answer(response, question)
-            return answer
         except Exception as e:
-            return f"Errore nella risoluzione: {str(e)}"
-    def clean_answer(self, raw_answer: str, question: str) -> str:
-        """Pulisce e formatta la risposta per EXACT MATCH"""
         # Rimuovi prefissi comuni
-        prefixes_to_remove = [
             "Final Answer:", "Risposta:", "Answer:", "Il risultato è:",
-            "La risposta è:", "Risposta finale:", "ANSWER:", "RISPOSTA:"
         ]
         cleaned = raw_answer.strip()
-        for prefix in prefixes_to_remove:
             if cleaned.startswith(prefix):
                 cleaned = cleaned[len(prefix):].strip()
-        # Gestione formati specifici basati sulla domanda
-        if "comma-separated" in question.lower():
-            # Assicura formato lista separata da virgole
-            cleaned = re.sub(r'\s*,\s*', ', ', cleaned)
-        if "number" in question.lower() or "how many" in question.lower():
-            # Estrai solo numeri per domande numeriche
-            numbers = re.findall(r'\d+(?:\.\d+)?', cleaned)
             if numbers:
-                cleaned = numbers[0]
-        if "yes" in question.lower() and "no" in question.lower():
-            # Domande yes/no
             if "yes" in cleaned.lower():
-                cleaned = "Yes"
             elif "no" in cleaned.lower():
-                cleaned = "No"
         return cleaned.strip()
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
@@ -316,35 +411,131 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
 # --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
-    )
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
@@ -368,4 +559,5 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import requests
 import inspect
 import pandas as pd
+from smolagents import CodeAgent, tool, HfApiModel
+from huggingface_hub import InferenceClient
+import requests
+import json
+from typing import Optional, Any, Dict, List
+import base64
+import io
 # (Keep Constants as is)
 # --- Constants ---
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
+class GAIAAgentHF:
     def __init__(self):
+        self.setup_hf_clients()
         self.setup_agent()
+    def setup_hf_clients(self):
+        """Configura i client per le API Hugging Face"""[1][3]
+        self.hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
+        # Client principale per inference
+        self.inference_client = InferenceClient(token=self.hf_token)[4][8]
+        # Modelli specializzati disponibili via API
+        self.models = {
+            "vision": "microsoft/kosmos-2-patch14-224",  # Multimodale per analisi immagini
+            "audio": "openai/whisper-large-v3",          # Trascrizione audio
+            "reasoning": "microsoft/DialoGPT-medium",     # Reasoning e chat
+            "math": "microsoft/DialoGPT-medium",         # Calcoli matematici
+            "summarization": "facebook/bart-large-cnn"    # Summarization
+        }
     def setup_agent(self):
+        """Configura l'agente con modello HF API"""[3][5]
+        # Usa HfApiModel per il reasoning principale
+        model = HfApiModel(model_id="microsoft/DialoGPT-medium", token=self.hf_token)
         self.agent = CodeAgent(
             tools=[
+                self.analyze_image_hf,
+                self.transcribe_audio_hf,
+                self.extract_and_analyze_text,
+                self.perform_calculation,
+                self.summarize_content
             ],
+            model=model,
+            max_iterations=8,
+            verbosity=1
         )
     @tool
+    def analyze_image_hf(self, image_path: str, question: str) -> str:
+        """Analizza immagini usando Kosmos-2 via API HF"""[4][8]
         try:
+            # Converti immagine in base64 per API
+            with open(image_path, "rb") as img_file:
+                img_data = base64.b64encode(img_file.read()).decode()
+            # Prompt ottimizzato per GAIA
             prompt = f"""
             Analizza questa immagine per rispondere alla domanda: {question}
+            Istruzioni specifiche:
+            - Se devi contare oggetti: fornisci il numero esatto
+            - Se devi leggere testo: trascrivi letteralmente
+            - Se devi identificare posizioni: usa riferimenti precisi
+            - Rispondi solo con l'informazione richiesta, senza prefissi
             """
+            response = self.inference_client.visual_question_answering(
+                image=img_data,
+                question=prompt,
+                model=self.models["vision"]
+            )
+            return self._clean_response(response)
         except Exception as e:
+            return f"Errore analisi immagine: {str(e)}"
     @tool
+    def transcribe_audio_hf(self, audio_path: str) -> str:
+        """Trascrizione audio con Whisper via API HF"""[4][8]
         try:
+            with open(audio_path, "rb") as audio_file:
+                audio_data = audio_file.read()
+            response = self.inference_client.automatic_speech_recognition(
+                audio_data,
+                model=self.models["audio"]
+            )
+            return response.get("text", "Trascrizione non disponibile")
         except Exception as e:
+            return f"Errore trascrizione: {str(e)}"
     @tool
+    def extract_and_analyze_text(self, file_path: str, question: str) -> str:
+        """Estrae e analizza testo da file con modelli HF"""[2][4]
         try:
+            # Estrazione testo base
+            content = ""
             if file_path.endswith('.txt'):
                 with open(file_path, 'r', encoding='utf-8') as f:
+                    content = f.read()
             elif file_path.endswith('.csv'):
                 import pandas as pd
                 df = pd.read_csv(file_path)
+                content = df.to_string()
             elif file_path.endswith(('.xlsx', '.xls')):
                 import pandas as pd
                 df = pd.read_excel(file_path)
+                content = df.to_string()
+            # Analisi con modello HF se il contenuto è lungo
+            if len(content) > 1000:
+                summary_prompt = f"""
+                Analizza questo contenuto per rispondere alla domanda: {question}
+                Contenuto: {content[:2000]}...
+                Fornisci una risposta precisa e diretta.
+                """
+                response = self.inference_client.text_generation(
+                    summary_prompt,
+                    model=self.models["summarization"],
+                    max_new_tokens=200
+                )
+                return response
+            return content
         except Exception as e:
+            return f"Errore elaborazione file: {str(e)}"
     @tool
+    def perform_calculation(self, expression: str, context: str = "") -> str:
+        """Calcoli matematici precisi con validazione"""[8]
         try:
+            # Sanitizza l'espressione
+            import re
+            safe_expr = re.sub(r'[^0-9+\-*/().\s]', '', expression)
+            # Valuta l'espressione
+            result = eval(safe_expr)
+            # Formatta il risultato basandosi sul contesto
+            if "decimal" in context.lower():
+                return f"{result:.6f}".rstrip('0').rstrip('.')
+            elif "integer" in context.lower():
+                return str(int(result))
+            else:
+                return str(result)
         except Exception as e:
+            # Fallback con modello HF per calcoli complessi
+            try:
+                calc_prompt = f"Calcola: {expression}. Fornisci solo il risultato numerico."
+                response = self.inference_client.text_generation(
+                    calc_prompt,
+                    model=self.models["math"],
+                    max_new_tokens=50
+                )
+                return self._extract_number(response)
+            except:
+                return f"Errore calcolo: {str(e)}"
     @tool
+    def summarize_content(self, text: str, focus: str = "") -> str:
+        """Summarization con BART via API HF"""[4][8]
         try:
+            if focus:
+                prompt = f"Riassumi questo testo focalizzandoti su: {focus}\n\nTesto: {text}"
+            else:
+                prompt = text
+            response = self.inference_client.summarization(
+                prompt,
+                model=self.models["summarization"],
+                max_length=150,
+                min_length=30
+            )
+            return response[0]["summary_text"] if isinstance(response, list) else response
         except Exception as e:
+            return f"Errore summarization: {str(e)}"
     def solve_question(self, question: str, file_path: Optional[str] = None) -> str:
+        """Risolve domande GAIA con approccio multi-step"""[11]
+        # Prompt engineering specifico per GAIA Level 1
         system_prompt = f"""
+        Sei un agente AI specializzato nel benchmark GAIA Level 1.
+        OBIETTIVO CRITICO: Fornire risposte in formato EXACT MATCH.
+        STRATEGIA:
+        1. Analizza la domanda per identificare il tipo di risposta richiesta
+        2. Se c'è un file, analizzalo completamente prima di procedere
+        3. Usa gli strumenti appropriati per ogni tipo di task
+        4. Verifica che la risposta sia nel formato esatto richiesto
+        5. NON aggiungere prefissi, suffissi o spiegazioni extra
+        FORMATI COMUNI GAIA:
+        - Numeri: solo il valore (es. "42")
+        - Liste: formato specificato nella domanda
+        - Date: formato richiesto (es. "2023-01-15")
+        - Yes/No: "Yes" o "No" esatti
+        - Testo: risposta diretta senza elaborazioni
         DOMANDA: {question}
+        {f"FILE DISPONIBILE: {file_path}" if file_path else ""}
+        Risolvi step-by-step e fornisci SOLO la risposta finale.
         """
         try:
             response = self.agent.run(system_prompt)
+            return self._format_final_answer(response, question)
         except Exception as e:
+            return f"Errore risoluzione: {str(e)}"
+    def _clean_response(self, response: Any) -> str:
+        """Pulisce le risposte dalle API HF"""
+        if isinstance(response, dict):
+            if "generated_text" in response:
+                return response["generated_text"].strip()
+            elif "answer" in response:
+                return response["answer"].strip()
+        elif isinstance(response, list) and response:
+            return str(response[0]).strip()
+        return str(response).strip()
+    def _extract_number(self, text: str) -> str:
+        """Estrae numeri dalle risposte testuali"""
+        import re
+        numbers = re.findall(r'-?\d+(?:\.\d+)?', text)
+        return numbers[0] if numbers else text.strip()
+    def _format_final_answer(self, raw_answer: str, question: str) -> str:
+        """Formatta la risposta finale per EXACT MATCH"""
         # Rimuovi prefissi comuni
+        prefixes = [
             "Final Answer:", "Risposta:", "Answer:", "Il risultato è:",
+            "La risposta è:", "Risposta finale:", "ANSWER:", "RISPOSTA:",
+            "The answer is:", "Result:", "Output:"
         ]
         cleaned = raw_answer.strip()
+        for prefix in prefixes:
             if cleaned.startswith(prefix):
                 cleaned = cleaned[len(prefix):].strip()
+        # Formattazione specifica per tipo di domanda
+        question_lower = question.lower()
+        if "how many" in question_lower or "count" in question_lower:
+            # Estrai solo il numero per domande di conteggio
+            numbers = re.findall(r'\d+', cleaned)
             if numbers:
+                return numbers[0]
+        if "yes or no" in question_lower or ("yes" in question_lower and "no" in question_lower):
+            # Standardizza risposte yes/no
             if "yes" in cleaned.lower():
+                return "Yes"
             elif "no" in cleaned.lower():
+                return "No"
+        if "list" in question_lower and "comma" in question_lower:
+            # Formatta liste separate da virgole
+            import re
+            cleaned = re.sub(r'\s*,\s*', ', ', cleaned)
         return cleaned.strip()
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
 # --- Build Gradio Interface using Blocks ---
+class GAIAEvaluatorHF:
+    def __init__(self):
+        self.base_url = "https://huggingface.co/spaces/huggingface-projects/gaia-benchmark-scoring/api"
+        self.agent = GAIAAgentHF()
+    def run_single_question(self, username: str) -> Dict:
+        """Testa su una singola domanda random"""
+        try:
+            # Ottieni domanda random
+            response = requests.get(f"{self.base_url}/random-question")
+            question_data = response.json()
+            task_id = question_data.get("task_id")
+            question_text = question_data.get("Question")
+            # Scarica file se disponibile
+            file_path = self._download_file(task_id)
+            # Risolvi con l'agente
+            answer = self.agent.solve_question(question_text, file_path)
+            # Invia risposta
+            result = self._submit_answer(task_id, answer)
+            return {
+                "task_id": task_id,
+                "question": question_text,
+                "answer": answer,
+                "result": result,
+                "file_used": file_path is not None
+            }
+        except Exception as e:
+            return {"error": str(e)}
+    def _download_file(self, task_id: str) -> Optional[str]:
+        """Scarica file associato alla task"""
+        try:
+            response = requests.get(f"{self.base_url}/files/{task_id}")
+            if response.status_code == 200:
+                filename = f"task_{task_id}_file"
+                with open(filename, 'wb') as f:
+                    f.write(response.content)
+                return filename
+        except:
+            pass
+        return None
+    def _submit_answer(self, task_id: str, answer: str) -> Dict:
+        """Invia risposta per valutazione"""
+        payload = {"task_id": task_id, "submitted_answer": answer.strip()}
+        try:
+            response = requests.post(f"{self.base_url}/submit", json=payload)
+            return response.json()
+        except Exception as e:
+            return {"error": str(e)}
+def create_interface():
+    evaluator = GAIAEvaluatorHF()
+    def test_single_question(username):
+        if not username:
+            return "⚠️ Inserisci il tuo username Hugging Face"
+        result = evaluator.run_single_question(username)
+        if "error" in result:
+            return f"❌ Errore: {result['error']}"
+        status = "✅ CORRETTO" if result["result"].get("correct", False) else "❌ SBAGLIATO"
+        file_info = "📁 Con file allegato" if result["file_used"] else "📄 Solo testo"
+        return f"""
+        ## 🧪 Test Singola Domanda GAIA
+        **Status:** {status}
+        **Task ID:** {result['task_id']}
+        **Tipo:** {file_info}
+        ### 📝 Domanda:
+        {result['question']}
+        ### 🤖 Risposta dell'Agente:
+        `{result['answer']}`
+        ### 📊 Risultato Valutazione:
+        {json.dumps(result['result'], indent=2)}
         """
+    # Interfaccia Gradio
+    with gr.Blocks(title="🏆 GAIA Agent - HF API Version") as iface:
+        gr.Markdown("# 🏆 GAIA Benchmark Agent - Hugging Face API")
+        gr.Markdown("Agente ottimizzato per GAIA Level 1 usando esclusivamente modelli Hugging Face via API")
+        with gr.Row():
+            username_input = gr.Textbox(
+                label="Username Hugging Face",
+                placeholder="il-tuo-username",
+                value=""
+            )
+            test_btn = gr.Button("🧪 Testa Domanda Random", variant="primary")
+        output_display = gr.Markdown()
+        test_btn.click(
+            fn=test_single_question,
+            inputs=[username_input],
+            outputs=[output_display]
+        )
+        gr.Markdown("""
+        ### 🔧 Configurazione Richiesta:
+        1. Imposta `HUGGINGFACE_HUB_TOKEN` nelle variabili d'ambiente
+        2. Il token deve avere permessi per Inference API
+        3. Assicurati di avere accesso ai modelli utilizzati
+        ### 🎯 Modelli Utilizzati:
+        - **Vision**: microsoft/kosmos-2-patch14-224
+        - **Audio**: openai/whisper-large-v3
+        - **Reasoning**: microsoft/DialoGPT-medium
+        - **Summarization**: facebook/bart-large-cnn
+        """)
+    return iface
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    iface = create_interface()
+    iface.launch()