MohamedAliAmiraa commited on
Commit
d10e815
·
verified ·
1 Parent(s): 20ff901

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -76
app.py CHANGED
@@ -22,87 +22,109 @@ except ImportError:
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
- # --- Agent Definition: The 'Router' Strategy ---
26
  class BasicAgent:
27
  def __init__(self):
28
- print("Initializing Agent...")
29
  try:
30
  self.llm = AzureChatOpenAI(
31
  azure_endpoint="https://dsap.openai.azure.com/",
32
  api_key=os.environ["AZURE_API_KEY"],
33
  azure_deployment="GPT4o-INTERNSHIP",
34
  api_version="2024-08-01-preview",
35
- temperature=0.0, max_retries=3,
36
  )
37
  except KeyError:
38
  raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
39
  print("Agent initialized.")
40
 
41
- def __call__(self, task: Dict[str, Any]) -> str:
42
- question = task.get("question")
43
- print(f"\n--- New Task ---\nQuestion: {question[:100]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # STRATEGY 1: Handle YouTube URLs directly
46
- if "youtube.com" in question or "youtu.be" in question:
47
- url_match = re.search(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)', question)
48
- if url_match:
49
- try:
50
- video_id = url_match.group(1)
51
- transcript = " ".join([item['text'] for item in YouTubeTranscriptApi.get_transcript(video_id)])
52
- prompt = f"Based on the following transcript, please answer the question.\n\nTranscript:\n{transcript[:4000]}\n\nQuestion:\n{question}"
53
- return self.llm.invoke(prompt).content
54
- except Exception as e: return f"Error processing YouTube video: {e}"
55
 
56
- # STRATEGY 2: Handle File Attachments directly
57
- file_url = task.get("files", [None])[0]
58
- if file_url:
59
- print(f"File detected: {file_url}")
60
- code_to_run = ""
61
  if file_url.endswith('.xlsx'):
62
- code_to_run = f"import pandas as pd; df = pd.read_excel('{file_url}'); print(df.to_string())"
63
- elif file_url.endswith(('.mp3', '.wav')):
64
- # Inform the LLM that audio processing is complex and ask for confirmation
65
- return "This question requires analyzing an audio file. This can be time-consuming and complex. Please confirm if I should proceed with downloading and analyzing the audio."
66
  elif file_url.endswith('.py'):
67
- code_to_run = f"import requests; r = requests.get('{file_url}'); print(r.text)"
68
- else: # For images or other file types
69
- return "I cannot directly analyze images or this file type. Please describe the content of the file if possible."
70
-
71
- # Execute the generated code for Excel or Python files
72
- buffer = io.StringIO()
73
- try:
74
- with redirect_stdout(buffer):
75
- exec(code_to_run, {'pd': pd, 'requests': requests, 'io': io})
76
- file_content = buffer.getvalue()
77
- prompt = f"The content of the file has been extracted as follows:\n\n{file_content}\n\nPlease use this content to answer the original question.\n\nQuestion:\n{question}"
78
- return self.llm.invoke(prompt).content
79
- except Exception as e:
80
- return f"Failed to execute Python code for file analysis. Error: {e}"
81
 
82
- # STRATEGY 3: Default to Web Search for all other questions
 
 
 
 
83
  try:
84
- with DDGS() as ddgs:
85
- results = [r for r in ddgs.text(f"{question}", max_results=3)]
86
- if not results: return "Could not find information on the web."
87
-
88
- context = ""
89
- for result in results:
90
- context += f"Title: {result['title']}\nURL: {result['href']}\nSnippet: {result['body']}\n\n"
91
-
92
- prompt = f"Based on the following search results, please provide a direct and concise answer to the question.\n\nSearch Results:\n{context}\n\nQuestion:\n{question}"
93
- return self.llm.invoke(prompt).content
94
  except Exception as e:
95
- return f"An error occurred during web search: {e}"
96
 
97
- # --- Your Original Submission and Gradio Code ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def run_and_submit_all(profile: gr.OAuthProfile | None):
99
- """
100
- Fetches all questions, runs the BasicAgent on them, submits all answers,
101
- and displays the results.
102
- """
103
  space_id = os.getenv("SPACE_ID")
104
 
105
- if profile:
106
  username = f"{profile.username}"
107
  print(f"User logged in: {username}")
108
  else:
@@ -112,7 +134,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
112
  api_url, questions_url, submit_url = DEFAULT_API_URL, f"{DEFAULT_API_URL}/questions", f"{DEFAULT_API_URL}/submit"
113
 
114
  try: agent = BasicAgent()
115
- except Exception as e: return f"Error initializing agent: {e}", None
116
 
117
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
118
 
@@ -131,7 +153,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
131
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
132
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
133
  except Exception as e:
134
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
135
 
136
  if not answers_payload: return "Agent did not produce answers.", pd.DataFrame(results_log)
137
 
@@ -147,28 +169,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
147
  return final_status, pd.DataFrame(results_log)
148
  except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log)
149
 
150
-
151
  with gr.Blocks() as demo:
152
- gr.Markdown("# Basic Agent Evaluation Runner")
153
- gr.Markdown(
154
- """
155
- **Instructions:**
156
- 1. Please clone this space, then modify the code...
157
- 2. Log in to your Hugging Face account using the button below...
158
- 3. Click 'Run Evaluation & Submit All Answers'...
159
- """
160
- )
161
- # This is your original, correct interface structure
162
  gr.LoginButton()
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
165
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
166
-
167
- # The click event with NO 'inputs' argument. This is the correct way.
168
- run_button.click(
169
- fn=run_and_submit_all,
170
- outputs=[status_output, results_table]
171
- )
172
 
173
  if __name__ == "__main__":
174
  demo.launch(debug=True, share=False)
 
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
+ # --- Agent Definition: The 'Orchestrator' Strategy ---
26
  class BasicAgent:
27
  def __init__(self):
28
+ print("Initializing Orchestrator Agent...")
29
  try:
30
  self.llm = AzureChatOpenAI(
31
  azure_endpoint="https://dsap.openai.azure.com/",
32
  api_key=os.environ["AZURE_API_KEY"],
33
  azure_deployment="GPT4o-INTERNSHIP",
34
  api_version="2024-08-01-preview",
35
+ temperature=0.0, max_retries=2,
36
  )
37
  except KeyError:
38
  raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
39
  print("Agent initialized.")
40
 
41
+ # --- Tool Definitions ---
42
+ def search_and_browse(self, query: str) -> str:
43
+ """Searches the web with DuckDuckGo and browses the top results."""
44
+ print(f"Tool: search_and_browse, Query: {query}")
45
+ context = ""
46
+ try:
47
+ with DDGS() as ddgs:
48
+ results = [r for r in ddgs.text(query, max_results=3)]
49
+ if not results: return f"No results found for '{query}'."
50
+
51
+ for result in results:
52
+ try:
53
+ url = result['href']
54
+ response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
55
+ soup = BeautifulSoup(response.content, 'html.parser')
56
+ text = ' '.join(soup.get_text().split())
57
+ context += f"Source URL: {url}\nContent: {text[:1500]}\n\n"
58
+ except Exception as e:
59
+ context += f"Could not browse {url}: {e}\n\n"
60
+ return context
61
+ except Exception as e:
62
+ return f"Error during search: {e}"
63
 
64
+ def analyze_file(self, file_url: str) -> str:
65
+ """Downloads a file from a URL and extracts its content as text."""
66
+ print(f"Tool: analyze_file, URL: {file_url}")
67
+ try:
68
+ response = requests.get(file_url)
69
+ response.raise_for_status()
 
 
 
 
70
 
 
 
 
 
 
71
  if file_url.endswith('.xlsx'):
72
+ df = pd.read_excel(io.BytesIO(response.content))
73
+ return f"Excel file content:\n{df.to_string()}"
 
 
74
  elif file_url.endswith('.py'):
75
+ return f"Python file content:\n{response.text}"
76
+ elif file_url.endswith(('.mp3', '.wav')):
77
+ # Audio processing is complex. For this final version, we will state the limitation clearly.
78
+ return "Limitation: Audio file detected. I cannot transcribe audio to determine its content. Please describe the audio if possible."
79
+ else: # Images, etc.
80
+ return "Limitation: This file type (e.g., image) cannot be analyzed. Please describe the content of the file."
81
+ except Exception as e:
82
+ return f"Error analyzing file: {e}"
 
 
 
 
 
 
83
 
84
+ def process_youtube(self, question: str) -> str:
85
+ """Extracts transcript from a YouTube URL in the question and returns it."""
86
+ print(f"Tool: process_youtube")
87
+ url_match = re.search(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)', question)
88
+ if not url_match: return "No YouTube URL found."
89
  try:
90
+ video_id = url_match.group(1)
91
+ # This is the correct, static method call for the library
92
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
93
+ return "YouTube Transcript: " + " ".join([item['text'] for item in transcript_list])
 
 
 
 
 
 
94
  except Exception as e:
95
+ return f"Error processing YouTube transcript: {e}"
96
 
97
+ # --- Main Orchestrator Logic ---
98
+ def __call__(self, task: Dict[str, Any]) -> str:
99
+ question = task.get("question")
100
+ print(f"\n--- New Task ---\nQuestion: {question[:150]}...")
101
+
102
+ context = ""
103
+ # 1. Check for a file URL first
104
+ file_url = task.get("files", [None])[0]
105
+ if file_url:
106
+ context = self.analyze_file(file_url)
107
+ # 2. Check for a YouTube URL in the question text
108
+ elif "youtube.com" in question or "youtu.be" in question:
109
+ context = self.process_youtube(question)
110
+ # 3. Default to web search for everything else
111
+ else:
112
+ context = self.search_and_browse(query=question)
113
+
114
+ # 4. Final step: Use the gathered context to generate an answer
115
+ final_prompt = f"Based ONLY on the following context, provide a direct and concise answer to the user's question. Do not use any other information.\n\nContext:\n{context}\n\nQuestion:\n{question}"
116
+ try:
117
+ final_answer = self.llm.invoke(final_prompt).content
118
+ print(f"Final Answer: {final_answer}")
119
+ return final_answer
120
+ except Exception as e:
121
+ return f"Error during final answer generation: {e}"
122
+
123
+ # --- Your Original, Correct Submission and Gradio Code ---
124
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
125
  space_id = os.getenv("SPACE_ID")
126
 
127
+ if profile and profile.username:
128
  username = f"{profile.username}"
129
  print(f"User logged in: {username}")
130
  else:
 
134
  api_url, questions_url, submit_url = DEFAULT_API_URL, f"{DEFAULT_API_URL}/questions", f"{DEFAULT_API_URL}/submit"
135
 
136
  try: agent = BasicAgent()
137
+ except Exception as e: return f"Error initializing agent: {e}\n\n{traceback.format_exc()}", None
138
 
139
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
140
 
 
153
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
154
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
155
  except Exception as e:
156
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {traceback.format_exc()}"})
157
 
158
  if not answers_payload: return "Agent did not produce answers.", pd.DataFrame(results_log)
159
 
 
169
  return final_status, pd.DataFrame(results_log)
170
  except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log)
171
 
172
+ # This is your original, correct interface structure that works.
173
  with gr.Blocks() as demo:
174
+ gr.Markdown("# Agent Evaluation Runner")
 
 
 
 
 
 
 
 
 
175
  gr.LoginButton()
176
  run_button = gr.Button("Run Evaluation & Submit All Answers")
177
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
178
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
179
+ # The click event with NO 'inputs' argument.
180
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
181
 
182
  if __name__ == "__main__":
183
  demo.launch(debug=True, share=False)