Commit
·
2ae28e5
1
Parent(s):
b6137ed
trying other models
Browse files
app.py
CHANGED
|
@@ -14,9 +14,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
|
|
| 14 |
|
| 15 |
# --- Constants ---
|
| 16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 17 |
-
# Switched to a more reliable and fast model available on the free Inference API
|
| 18 |
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 19 |
-
# Updated prompt template to match the Mistral format
|
| 20 |
PROMPT_TEMPLATE = """<s>[INST]You are a helpful assistant designed to answer questions accurately. You have access to the following tools:
|
| 21 |
|
| 22 |
{tools_description}
|
|
@@ -32,12 +30,15 @@ When you have the final answer, respond with:
|
|
| 32 |
Thought: I have now found the final answer.
|
| 33 |
Final Answer: The final answer.
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
Question: {question}
|
| 38 |
[/INST]{scratchpad}"""
|
| 39 |
|
| 40 |
-
|
| 41 |
# --- Tool Definitions ---
|
| 42 |
|
| 43 |
class WebSearchTool:
|
|
@@ -105,21 +106,24 @@ class FileReaderTool:
|
|
| 105 |
content = "\n".join(para.text for para in doc.paragraphs)
|
| 106 |
elif file_name.endswith('.csv'):
|
| 107 |
df = pd.read_csv(file_content)
|
| 108 |
-
content = df.
|
| 109 |
elif file_name.endswith('.json'):
|
| 110 |
data = json.load(file_content)
|
| 111 |
content = json.dumps(data, indent=2)
|
| 112 |
elif file_name.endswith('.txt'):
|
| 113 |
content = file_content.read().decode('utf-8')
|
|
|
|
|
|
|
|
|
|
| 114 |
else:
|
| 115 |
-
return f"Error: Unsupported file type for '{file_name}'. Supported types: .pdf, .docx, .csv, .json, .txt."
|
| 116 |
return content if content else "File is empty."
|
| 117 |
except Exception as e:
|
| 118 |
return f"Error reading file '{file_name}': {e}"
|
| 119 |
|
| 120 |
@property
|
| 121 |
def description(self):
|
| 122 |
-
return 'file_reader(task_id: str, file_name: str) -> str - Reads content of text-based files (.pdf, .docx, .csv, .json, .txt). For audio, use audio_transcriber.'
|
| 123 |
|
| 124 |
class AudioTranscriptionTool:
|
| 125 |
"""A tool to transcribe audio files using the Hugging Face Inference API."""
|
|
@@ -134,8 +138,12 @@ class AudioTranscriptionTool:
|
|
| 134 |
response = requests.get(file_url, timeout=30)
|
| 135 |
response.raise_for_status()
|
| 136 |
audio_data = response.content
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
except Exception as e:
|
| 140 |
return f"Error during audio transcription: {e}"
|
| 141 |
|
|
@@ -161,7 +169,6 @@ class YouTubeTranscriptTool:
|
|
| 161 |
def description(self):
|
| 162 |
return 'youtube_transcript_fetcher(video_url: str) -> str - Fetches the transcript of a YouTube video. Use for questions about video content.'
|
| 163 |
|
| 164 |
-
|
| 165 |
# --- GAIA Agent Definition ---
|
| 166 |
class GaiaAgent:
|
| 167 |
def __init__(self, hf_token: str, api_url: str, max_turns: int = 8):
|
|
@@ -291,7 +298,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 291 |
error_detail = f"Server responded with status {e.response.status_code}. Response: {e.response.text[:500]}" if e.response else str(e)
|
| 292 |
return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
|
| 293 |
|
| 294 |
-
|
| 295 |
# --- Gradio Interface ---
|
| 296 |
with gr.Blocks() as demo:
|
| 297 |
gr.Markdown("# GAIA Agent Evaluation Runner")
|
|
|
|
| 14 |
|
| 15 |
# --- Constants ---
|
| 16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
| 17 |
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
|
|
|
|
| 18 |
PROMPT_TEMPLATE = """<s>[INST]You are a helpful assistant designed to answer questions accurately. You have access to the following tools:
|
| 19 |
|
| 20 |
{tools_description}
|
|
|
|
| 30 |
Thought: I have now found the final answer.
|
| 31 |
Final Answer: The final answer.
|
| 32 |
|
| 33 |
+
Important:
|
| 34 |
+
- Do not use a tool if you are not sure about the parameters.
|
| 35 |
+
- Do not make up file names.
|
| 36 |
+
- If a tool is not available for a task (e.g., image analysis), state that you cannot answer.
|
| 37 |
+
- If a tool returns an error, note it and try an alternative approach if possible.
|
| 38 |
|
| 39 |
Question: {question}
|
| 40 |
[/INST]{scratchpad}"""
|
| 41 |
|
|
|
|
| 42 |
# --- Tool Definitions ---
|
| 43 |
|
| 44 |
class WebSearchTool:
|
|
|
|
| 106 |
content = "\n".join(para.text for para in doc.paragraphs)
|
| 107 |
elif file_name.endswith('.csv'):
|
| 108 |
df = pd.read_csv(file_content)
|
| 109 |
+
content = df.to_json(orient='records') # Return JSON for easier processing
|
| 110 |
elif file_name.endswith('.json'):
|
| 111 |
data = json.load(file_content)
|
| 112 |
content = json.dumps(data, indent=2)
|
| 113 |
elif file_name.endswith('.txt'):
|
| 114 |
content = file_content.read().decode('utf-8')
|
| 115 |
+
elif file_name.endswith('.xlsx'):
|
| 116 |
+
df = pd.read_excel(file_content, engine='openpyxl')
|
| 117 |
+
content = df.to_json(orient='records') # Return JSON for easier processing
|
| 118 |
else:
|
| 119 |
+
return f"Error: Unsupported file type for '{file_name}'. Supported types: .pdf, .docx, .csv, .json, .txt, .xlsx."
|
| 120 |
return content if content else "File is empty."
|
| 121 |
except Exception as e:
|
| 122 |
return f"Error reading file '{file_name}': {e}"
|
| 123 |
|
| 124 |
@property
|
| 125 |
def description(self):
|
| 126 |
+
return 'file_reader(task_id: str, file_name: str) -> str - Reads content of text-based files (.pdf, .docx, .csv, .json, .txt, .xlsx). For audio, use audio_transcriber.'
|
| 127 |
|
| 128 |
class AudioTranscriptionTool:
|
| 129 |
"""A tool to transcribe audio files using the Hugging Face Inference API."""
|
|
|
|
| 138 |
response = requests.get(file_url, timeout=30)
|
| 139 |
response.raise_for_status()
|
| 140 |
audio_data = response.content
|
| 141 |
+
# Specify Whisper-large-v2 for accurate transcription
|
| 142 |
+
transcription = self.client.automatic_speech_recognition(audio_data, model="openai/whisper-large-v2")
|
| 143 |
+
if transcription and 'text' in transcription:
|
| 144 |
+
return transcription['text']
|
| 145 |
+
else:
|
| 146 |
+
return "Could not transcribe audio."
|
| 147 |
except Exception as e:
|
| 148 |
return f"Error during audio transcription: {e}"
|
| 149 |
|
|
|
|
| 169 |
def description(self):
|
| 170 |
return 'youtube_transcript_fetcher(video_url: str) -> str - Fetches the transcript of a YouTube video. Use for questions about video content.'
|
| 171 |
|
|
|
|
| 172 |
# --- GAIA Agent Definition ---
|
| 173 |
class GaiaAgent:
|
| 174 |
def __init__(self, hf_token: str, api_url: str, max_turns: int = 8):
|
|
|
|
| 298 |
error_detail = f"Server responded with status {e.response.status_code}. Response: {e.response.text[:500]}" if e.response else str(e)
|
| 299 |
return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
|
| 300 |
|
|
|
|
| 301 |
# --- Gradio Interface ---
|
| 302 |
with gr.Blocks() as demo:
|
| 303 |
gr.Markdown("# GAIA Agent Evaluation Runner")
|