MohamedAliAmiraa commited on
Commit
2a3a0a8
·
verified ·
1 Parent(s): d10e815

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -98
app.py CHANGED
@@ -22,10 +22,10 @@ except ImportError:
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
- # --- Agent Definition: The 'Orchestrator' Strategy ---
26
  class BasicAgent:
27
  def __init__(self):
28
- print("Initializing Orchestrator Agent...")
29
  try:
30
  self.llm = AzureChatOpenAI(
31
  azure_endpoint="https://dsap.openai.azure.com/",
@@ -36,147 +36,168 @@ class BasicAgent:
36
  )
37
  except KeyError:
38
  raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
 
 
 
 
 
 
 
 
39
  print("Agent initialized.")
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # --- Tool Definitions ---
42
- def search_and_browse(self, query: str) -> str:
43
- """Searches the web with DuckDuckGo and browses the top results."""
44
- print(f"Tool: search_and_browse, Query: {query}")
45
- context = ""
46
  try:
47
  with DDGS() as ddgs:
48
- results = [r for r in ddgs.text(query, max_results=3)]
49
- if not results: return f"No results found for '{query}'."
50
-
51
- for result in results:
52
- try:
53
- url = result['href']
54
- response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
55
- soup = BeautifulSoup(response.content, 'html.parser')
56
- text = ' '.join(soup.get_text().split())
57
- context += f"Source URL: {url}\nContent: {text[:1500]}\n\n"
58
- except Exception as e:
59
- context += f"Could not browse {url}: {e}\n\n"
60
- return context
61
- except Exception as e:
62
- return f"Error during search: {e}"
63
 
64
- def analyze_file(self, file_url: str) -> str:
65
- """Downloads a file from a URL and extracts its content as text."""
66
- print(f"Tool: analyze_file, URL: {file_url}")
67
  try:
68
- response = requests.get(file_url)
69
- response.raise_for_status()
70
-
71
- if file_url.endswith('.xlsx'):
72
- df = pd.read_excel(io.BytesIO(response.content))
73
- return f"Excel file content:\n{df.to_string()}"
74
- elif file_url.endswith('.py'):
75
- return f"Python file content:\n{response.text}"
76
- elif file_url.endswith(('.mp3', '.wav')):
77
- # Audio processing is complex. For this final version, we will state the limitation clearly.
78
- return "Limitation: Audio file detected. I cannot transcribe audio to determine its content. Please describe the audio if possible."
79
- else: # Images, etc.
80
- return "Limitation: This file type (e.g., image) cannot be analyzed. Please describe the content of the file."
81
- except Exception as e:
82
- return f"Error analyzing file: {e}"
83
 
84
- def process_youtube(self, question: str) -> str:
85
- """Extracts transcript from a YouTube URL in the question and returns it."""
86
- print(f"Tool: process_youtube")
87
- url_match = re.search(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)', question)
88
- if not url_match: return "No YouTube URL found."
89
  try:
90
- video_id = url_match.group(1)
91
- # This is the correct, static method call for the library
92
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
93
- return "YouTube Transcript: " + " ".join([item['text'] for item in transcript_list])
94
- except Exception as e:
95
- return f"Error processing YouTube transcript: {e}"
96
 
97
- # --- Main Orchestrator Logic ---
98
- def __call__(self, task: Dict[str, Any]) -> str:
99
- question = task.get("question")
100
- print(f"\n--- New Task ---\nQuestion: {question[:150]}...")
101
-
102
- context = ""
103
- # 1. Check for a file URL first
104
- file_url = task.get("files", [None])[0]
105
- if file_url:
106
- context = self.analyze_file(file_url)
107
- # 2. Check for a YouTube URL in the question text
108
- elif "youtube.com" in question or "youtu.be" in question:
109
- context = self.process_youtube(question)
110
- # 3. Default to web search for everything else
111
- else:
112
- context = self.search_and_browse(query=question)
113
-
114
- # 4. Final step: Use the gathered context to generate an answer
115
- final_prompt = f"Based ONLY on the following context, provide a direct and concise answer to the user's question. Do not use any other information.\n\nContext:\n{context}\n\nQuestion:\n{question}"
116
  try:
117
- final_answer = self.llm.invoke(final_prompt).content
118
- print(f"Final Answer: {final_answer}")
119
- return final_answer
120
- except Exception as e:
121
- return f"Error during final answer generation: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # --- Your Original, Correct Submission and Gradio Code ---
124
  def run_and_submit_all(profile: gr.OAuthProfile | None):
125
  space_id = os.getenv("SPACE_ID")
126
-
127
- if profile and profile.username:
128
- username = f"{profile.username}"
129
- print(f"User logged in: {username}")
130
- else:
131
- print("User not logged in.")
132
  return "Please Login to Hugging Face with the button.", None
133
-
134
- api_url, questions_url, submit_url = DEFAULT_API_URL, f"{DEFAULT_API_URL}/questions", f"{DEFAULT_API_URL}/submit"
135
 
136
  try: agent = BasicAgent()
137
- except Exception as e: return f"Error initializing agent: {e}\n\n{traceback.format_exc()}", None
138
 
139
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
140
 
141
  try:
142
- response = requests.get(questions_url, timeout=20)
143
  response.raise_for_status()
144
  questions_data = response.json()
145
  except Exception as e: return f"Error fetching questions: {e}", None
146
 
147
  results_log, answers_payload = [], []
148
  for item in questions_data:
149
- task_id, question_text = item.get("task_id"), item.get("question")
150
- if not task_id or question_text is None: continue
151
  try:
152
  submitted_answer = agent(item)
153
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
154
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
155
  except Exception as e:
156
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {traceback.format_exc()}"})
 
157
 
158
- if not answers_payload: return "Agent did not produce answers.", pd.DataFrame(results_log)
159
-
160
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
161
 
162
  try:
163
- response = requests.post(submit_url, json=submission_data, timeout=60)
164
  response.raise_for_status()
165
  result_data = response.json()
166
- final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
167
- f"Overall Score: {result_data.get('score', 'N/A')}% "
168
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)")
169
  return final_status, pd.DataFrame(results_log)
170
- except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
171
 
172
- # This is your original, correct interface structure that works.
173
  with gr.Blocks() as demo:
174
  gr.Markdown("# Agent Evaluation Runner")
175
  gr.LoginButton()
176
  run_button = gr.Button("Run Evaluation & Submit All Answers")
177
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
178
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
179
- # The click event with NO 'inputs' argument.
180
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
181
 
182
  if __name__ == "__main__":
 
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
+ # --- Agent Definition: A True ReAct Agent ---
26
  class BasicAgent:
27
  def __init__(self):
28
+ print("Initializing ReAct Agent...")
29
  try:
30
  self.llm = AzureChatOpenAI(
31
  azure_endpoint="https://dsap.openai.azure.com/",
 
36
  )
37
  except KeyError:
38
  raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
39
+
40
+ self.tools = {
41
+ "search": self.search,
42
+ "browse": self.browse,
43
+ "python": self.python,
44
+ "youtube_transcript": self.youtube_transcript,
45
+ }
46
+ self.system_prompt = self._create_system_prompt()
47
  print("Agent initialized.")
48
 
49
+ def _create_system_prompt(self) -> str:
50
+ """Creates the master prompt that guides the ReAct agent."""
51
+ tool_docs = "\n".join([f"- {name}: {inspect.getdoc(func)}" for name, func in self.tools.items()])
52
+ return f"""
53
+ You are a helpful assistant that answers questions by thinking step-by-step and using the tools provided.
54
+
55
+ You have access to the following tools:
56
+ {tool_docs}
57
+
58
+ Follow this process:
59
+ 1. **Thought:** Analyze the user's question and decide what to do.
60
+ 2. **Action:** Choose ONE tool from the list: {", ".join(self.tools.keys())}.
61
+ 3. **Action Input:** Provide the input for the chosen tool.
62
+ 4. **Observation:** After you use a tool, you will see its output.
63
+ 5. Repeat this Thought/Action/Action Input/Observation cycle until you are certain you have the final answer.
64
+ 6. **Thought:** Conclude that you have the final answer.
65
+ 7. **Final Answer:** Provide the final answer to the user.
66
+
67
+ **Important Guidelines:**
68
+ - For web-based questions, `search` first to get URLs, then `browse` the most relevant URL.
69
+ - If a question provides a file URL, you MUST use the `python` tool to download and analyze it. Example: `requests.get(url).content`.
70
+ - If you can answer the question directly without tools, do so immediately with a "Final Answer".
71
+
72
+ Begin!
73
+ """
74
+
75
  # --- Tool Definitions ---
76
+ def search(self, query: str) -> str:
77
+ """Searches the web with DuckDuckGo to find relevant URLs."""
78
+ print(f"Tool: search, Query: {query}")
 
79
  try:
80
  with DDGS() as ddgs:
81
+ results = [r for r in ddgs.text(query, max_results=5)]
82
+ return str(results) if results else "No results found."
83
+ except Exception as e: return f"Error during search: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ def browse(self, url: str) -> str:
86
+ """Gets the full, clean text content of a single webpage."""
87
+ print(f"Tool: browse, URL: {url}")
88
  try:
89
+ response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
90
+ soup = BeautifulSoup(response.content, 'html.parser')
91
+ return ' '.join(soup.get_text().split())[:4000] # Limit context size
92
+ except Exception as e: return f"Error browsing {url}: {e}"
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ def python(self, code: str) -> str:
95
+ """Executes Python code to analyze data or files. ALWAYS use this for file URLs."""
96
+ print(f"Tool: python, Code: {code}")
97
+ code = code.strip().strip("`").replace("python\n", "").strip()
98
+ buffer = io.StringIO()
99
  try:
100
+ safe_globals = {'pd': pd, 'np': np, 'requests': requests, 'io': io, 'librosa': librosa, 'sf': sf, 'openpyxl': openpyxl}
101
+ with redirect_stdout(buffer):
102
+ exec(code, safe_globals)
103
+ return f"Execution successful. Output:\n{buffer.getvalue()}"
104
+ except Exception as e: return f"Execution failed. Error:\n{traceback.format_exc()}"
 
105
 
106
+ def youtube_transcript(self, url: str) -> str:
107
+ """Fetches the transcript of a YouTube video."""
108
+ print(f"Tool: youtube_transcript, URL: {url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  try:
110
+ video_id = re.search(r"(?<=v=)[\w-]+", url).group(0)
111
+ return " ".join([item['text'] for item in YouTubeTranscriptApi.get_transcript(video_id)])
112
+ except Exception as e: return f"Error processing YouTube transcript: {e}"
113
+
114
+ # --- Main ReAct Loop ---
115
+ def __call__(self, task: Dict[str, Any]) -> str:
116
+ question = task.get("question", "")
117
+ if task.get("files"):
118
+ question += f"\nFile available at: {task['files'][0]}"
119
+
120
+ prompt = f"{self.system_prompt}\nQuestion: {question}\nThought:"
121
+ history = ""
122
+
123
+ for i in range(8): # Max 8 steps
124
+ print(f"--- Step {i+1} ---")
125
+ full_prompt = prompt + history
126
+
127
+ llm_response = self.llm.invoke(full_prompt).content.strip()
128
+ history += f"{llm_response}"
129
+
130
+ final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_response, re.DOTALL)
131
+ if final_answer_match:
132
+ print(f"Final Answer Found: {final_answer_match.group(1)}")
133
+ return final_answer_match.group(1).strip()
134
+
135
+ action_match = re.search(r"Action:\s*(\w+)\s*Action Input:\s*(.*)", llm_response, re.DOTALL)
136
+ if action_match:
137
+ tool_name = action_match.group(1).strip()
138
+ tool_input = action_match.group(2).strip(' \n"`')
139
+ if tool_name in self.tools:
140
+ try:
141
+ tool_result = self.tools[tool_name](tool_input)
142
+ except Exception as e:
143
+ tool_result = f"Error calling tool {tool_name}: {e}"
144
+ else:
145
+ tool_result = f"Error: Unknown tool '{tool_name}'."
146
+
147
+ history += f"\nObservation: {tool_result}\nThought:"
148
+ else:
149
+ # If the agent doesn't format an action, just return its last thought.
150
+ return llm_response
151
+
152
+ return "Agent could not reach a final answer after multiple steps."
153
 
154
  # --- Your Original, Correct Submission and Gradio Code ---
155
  def run_and_submit_all(profile: gr.OAuthProfile | None):
156
  space_id = os.getenv("SPACE_ID")
157
+ if not (profile and profile.username):
 
 
 
 
 
158
  return "Please Login to Hugging Face with the button.", None
159
+ username = profile.username
160
+ print(f"User logged in: {username}")
161
 
162
  try: agent = BasicAgent()
163
+ except Exception as e: return f"Error initializing agent: {e}", None
164
 
165
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
166
 
167
  try:
168
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
169
  response.raise_for_status()
170
  questions_data = response.json()
171
  except Exception as e: return f"Error fetching questions: {e}", None
172
 
173
  results_log, answers_payload = [], []
174
  for item in questions_data:
175
+ if not (task_id := item.get("task_id")): continue
 
176
  try:
177
  submitted_answer = agent(item)
178
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
179
  except Exception as e:
180
+ submitted_answer = f"AGENT ERROR: {traceback.format_exc()}"
181
+ results_log.append({"Task ID": task_id, "Question": item.get("question"), "Submitted Answer": submitted_answer})
182
 
183
+ submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
 
 
184
 
185
  try:
186
+ response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
187
  response.raise_for_status()
188
  result_data = response.json()
189
+ final_status = (f"Submission Successful! Score: {result_data.get('score', 'N/A')}%")
 
 
190
  return final_status, pd.DataFrame(results_log)
191
+ except Exception as e:
192
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
193
 
 
194
  with gr.Blocks() as demo:
195
  gr.Markdown("# Agent Evaluation Runner")
196
  gr.LoginButton()
197
  run_button = gr.Button("Run Evaluation & Submit All Answers")
198
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
199
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
200
+ # The correct call with NO 'inputs' argument.
201
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
202
 
203
  if __name__ == "__main__":