MohamedAliAmiraa commited on
Commit
bc9c69f
·
verified ·
1 Parent(s): 75b066d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -91
app.py CHANGED
@@ -12,7 +12,7 @@ import traceback
12
  # --- Core Libraries ---
13
  try:
14
  from langchain_openai import AzureChatOpenAI
15
- from ddgs import DDGS
16
  from bs4 import BeautifulSoup
17
  from youtube_transcript_api import YouTubeTranscriptApi
18
  import openpyxl, librosa, soundfile as sf, numpy as np
@@ -22,10 +22,10 @@ except ImportError:
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
- # --- Agent Definition: A Smart Orchestrator + ReAct Agent ---
26
  class BasicAgent:
27
  def __init__(self):
28
- print("Initializing Hybrid Agent...")
29
  try:
30
  self.llm = AzureChatOpenAI(
31
  azure_endpoint="https://dsap.openai.azure.com/",
@@ -38,111 +38,172 @@ class BasicAgent:
38
  raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
39
 
40
  self.tools = {
41
- "search_and_browse": self.search_and_browse,
42
- "python_file_analyzer": self.python_file_analyzer,
 
 
43
  }
44
- self.react_system_prompt = self._create_react_prompt()
45
  print("Agent initialized.")
46
 
47
- def _create_react_prompt(self) -> str:
48
- """Creates the prompt for the ReAct loop (for web questions)."""
49
- return """
50
- You are a helpful assistant that answers questions by searching the web.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  **Process:**
52
- 1. **Thought:** Analyze the user's question and decide what to search for.
53
- 2. **Action:** Use the `search_and_browse` tool.
54
- 3. **Action Input:** Provide a concise search query.
55
- 4. **Observation:** You will see the content of the top search results.
56
- 5. **Thought:** Analyze the search results. If you have enough information, provide the final answer. If not, refine your search and use the `search_and_browse` tool again.
57
- 6. **Final Answer:** Provide the final, direct answer to the user's question.
 
 
 
 
 
 
 
 
58
  Begin!
59
  """
60
 
61
  # --- Tool Definitions ---
62
- def search_and_browse(self, query: str) -> str:
63
- """Searches the web and browses the top 3 results to gather context."""
64
- print(f"Tool: search_and_browse, Query: {query}")
65
- context = ""
66
  try:
67
  with DDGS() as ddgs:
68
- results = [r for r in ddgs.text(query, max_results=3)]
69
- if not results: return f"No results found for '{query}'."
70
-
71
- for result in results:
72
- try:
73
- url = result['href']
74
- response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
75
- soup = BeautifulSoup(response.content, 'html.parser')
76
- context += f"Source: {url}\nContent: {' '.join(soup.get_text().split())[:1500]}\n\n"
77
- except Exception as e:
78
- context += f"Could not browse {url}: {e}\n\n"
79
- return context
80
- except Exception as e:
81
- return f"Error during search: {e}"
82
-
83
- def python_file_analyzer(self, file_url: str) -> str:
84
- """
85
- Downloads a file from a URL and analyzes its content using Python.
86
- This tool is called directly by the orchestrator, not by the LLM.
87
- """
88
- print(f"Tool: python_file_analyzer, URL: {file_url}")
89
-
90
- # Handle non-downloadable file types first
91
- if file_url.endswith(('.png', '.jpg', '.jpeg', '.gif')):
92
- return "Limitation: I cannot analyze image content. Please describe the image."
93
- if file_url.endswith(('.mp3', '.wav')):
94
- return "Limitation: I cannot reliably transcribe audio files. Please provide a transcript."
95
 
96
- # For downloadable files, use Python
 
97
  try:
98
- response = requests.get(file_url)
99
- response.raise_for_status()
100
-
101
- if file_url.endswith('.xlsx'):
102
- df = pd.read_excel(io.BytesIO(response.content))
103
- return f"Successfully read the Excel file. Here is its content:\n\n{df.to_string()}"
104
- elif file_url.endswith('.py'):
105
- return f"Successfully read the Python file. Here is its content:\n\n{response.text}"
106
- else:
107
- return "Unsupported file type."
108
- except Exception as e:
109
- return f"Failed to download or process the file. Error: {e}"
 
 
 
 
 
 
 
 
 
 
110
 
111
- # --- Main Orchestrator Logic ---
112
  def __call__(self, task: Dict[str, Any]) -> str:
113
- question = task.get("question", "")
114
- print(f"\n--- New Task ---\nQuestion: {question[:150]}...")
115
-
116
  file_url = task.get("files", [None])[0]
117
-
118
- # STRATEGY 1: Deterministic File Handling (Orchestrator)
 
119
  if file_url:
120
- # The orchestrator calls the tool directly, removing LLM unreliability
121
- context = self.python_file_analyzer(file_url)
122
- final_prompt = f"Based ONLY on the following file content, provide a direct and concise answer to the user's question.\n\nFile Content:\n{context}\n\nUser Question:\n{question}"
123
- return self.llm.invoke(final_prompt).content
124
-
125
- # STRATEGY 2: Flexible Web Search (ReAct Loop)
126
- else:
127
- prompt = f"{self.react_system_prompt}\nQuestion: {question}\nThought:"
128
- history = ""
129
- for i in range(5): # Max 5 steps
130
- full_prompt = prompt + history
131
- llm_response = self.llm.invoke(full_prompt).content.strip()
132
-
133
- final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_response, re.DOTALL)
134
- if final_answer_match:
135
- return final_answer_match.group(1).strip()
136
-
137
- # For web search, we assume the only tool is search_and_browse
138
- action_match = re.search(r"Action Input:\s*(.*)", llm_response)
139
- if action_match:
140
- query = action_match.group(1).strip()
141
- observation = self.search_and_browse(query)
142
- history += f"\n{llm_response}\nObservation: {observation}\nThought:"
 
 
 
 
 
 
143
  else:
144
- return llm_response # Fallback
145
- return "Agent could not reach a final answer after multiple web searches."
 
 
 
 
 
 
146
 
147
  # --- Your Original, Correct Submission and Gradio Code ---
148
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
12
  # --- Core Libraries ---
13
  try:
14
  from langchain_openai import AzureChatOpenAI
15
+ from ddgs import DDGS # Use the new, correct library
16
  from bs4 import BeautifulSoup
17
  from youtube_transcript_api import YouTubeTranscriptApi
18
  import openpyxl, librosa, soundfile as sf, numpy as np
 
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
+ # --- Agent Definition: A Professional ReAct Agent with Dynamic Examples ---
26
  class BasicAgent:
27
  def __init__(self):
28
+ print("Initializing ReAct Agent...")
29
  try:
30
  self.llm = AzureChatOpenAI(
31
  azure_endpoint="https://dsap.openai.azure.com/",
 
38
  raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
39
 
40
  self.tools = {
41
+ "search": self.search,
42
+ "browse": self.browse,
43
+ "python": self.python,
44
+ "youtube_transcript": self.youtube_transcript,
45
  }
 
46
  print("Agent initialized.")
47
 
48
+ def _create_system_prompt(self, file_url=None) -> str:
49
+ """Creates the master prompt, dynamically injecting a file-handling example if a URL is provided."""
50
+ tool_docs = "\n".join([f"- {name}: {inspect.getdoc(func)}" for name, func in self.tools.items()])
51
+
52
+ web_search_example = """
53
+ **Example: Web Search**
54
+ Question: Who was the prime minister of the UK in 1999?
55
+ Thought: I need to find out who was the prime minister of the UK in 1999. I will use the search tool.
56
+ Action: search
57
+ Action Input: prime minister of UK 1999
58
+ Observation: [{{'title': 'Tony Blair - Wikipedia', 'href': 'https://en.wikipedia.org/wiki/Tony_Blair', ...}}]
59
+ Thought: The search results point to Tony Blair. The first link looks promising. I will browse the Wikipedia page to confirm.
60
+ Action: browse
61
+ Action Input: https://en.wikipedia.org/wiki/Tony_Blair
62
+ Observation: [Page content confirming Tony Blair was Prime Minister from 1997 to 2007]
63
+ Thought: I have confirmed the answer from a reliable source.
64
+ Final Answer: Tony Blair"""
65
+
66
+ file_analysis_example = ""
67
+ if file_url:
68
+ code_snippet = "# This is a placeholder, will be replaced by a specific file handler\n"
69
+ if file_url.endswith(('.xlsx', '.csv')):
70
+ code_snippet = f"""
71
+ import pandas as pd
72
+ import requests
73
+ import io
74
+ # The user's file is at this URL, which MUST be used.
75
+ url = '{file_url}'
76
+ response = requests.get(url)
77
+ df = pd.read_excel(io.BytesIO(response.content))
78
+ # Now, I must analyze the dataframe `df` to answer the question.
79
+ # For example, to see the first few rows, I can print(df.head()).
80
+ # To calculate total sales, I would use print(df['Sales'].sum()).
81
+ print(df.to_string())
82
+ """
83
+ elif file_url.endswith('.py'):
84
+ code_snippet = f"""
85
+ import requests
86
+ # The user's Python code file is at this URL, which MUST be used.
87
+ url = '{file_url}'
88
+ response = requests.get(url)
89
+ python_code_to_run = response.text
90
+ # Now, I must execute this code to find the output.
91
+ # I will use another python action to run the code.
92
+ print("Code downloaded. Ready for execution in the next step.")
93
+ """
94
+
95
+ if code_snippet:
96
+ file_analysis_example = f"""
97
+ **Example: File Analysis (Use this exact code pattern)**
98
+ Question: Analyze the attached file. File available at: {file_url}
99
+ Thought: The user has provided a file. I must use the `python` tool to download and analyze it using the exact URL from the question. The following code pattern is perfect for this. I will copy it exactly.
100
+ Action: python
101
+ Action Input:
102
+ {code_snippet}
103
+ Observation: [The output of the python script]
104
+ Thought: I have analyzed the file content. Now I can answer the user's question based on the script's output.
105
+ Final Answer: [Answer based on the script's output]"""
106
+
107
+ return f"""
108
+ You are a helpful assistant that answers questions by thinking step-by-step and using the tools provided.
109
+
110
  **Process:**
111
+ 1. **Thought:** Analyze the user's question and create a plan. If you see an example below that matches your plan, follow it exactly.
112
+ 2. **Action:** Choose ONE tool from the list: {", ".join(self.tools.keys())}.
113
+ 3. **Action Input:** Provide the input for the chosen tool. This can be multi-line.
114
+ 4. **Observation:** After you use a tool, you will see its output.
115
+ 5. Repeat this Thought/Action/Action Input/Observation cycle until you are certain you have the final answer.
116
+ 6. **Thought:** Conclude that you have the final answer.
117
+ 7. **Final Answer:** Provide the final, direct answer to the user's question.
118
+
119
+ You have access to the following tools:
120
+ {tool_docs}
121
+
122
+ {web_search_example}
123
+ {file_analysis_example}
124
+
125
  Begin!
126
  """
127
 
128
  # --- Tool Definitions ---
129
+ def search(self, query: str) -> str:
130
+ """Searches the web with DuckDuckGo to find relevant URLs and information."""
 
 
131
  try:
132
  with DDGS() as ddgs:
133
+ return str([r for r in ddgs.text(query, max_results=4)])
134
+ except Exception as e: return f"Error during search: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ def browse(self, url: str) -> str:
137
+ """Gets the full, clean text content of a single webpage URL."""
138
  try:
139
+ response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
140
+ soup = BeautifulSoup(response.content, 'html.parser')
141
+ return ' '.join(soup.get_text().split())[:4000]
142
+ except Exception as e: return f"Error browsing {url}: {e}"
143
+
144
+ def python(self, code: str) -> str:
145
+ """Executes Python code to analyze data or files. Use `requests` to download files from URLs."""
146
+ code = code.strip().strip("`").replace("python\n", "").strip()
147
+ buffer = io.StringIO()
148
+ try:
149
+ safe_globals = {'pd': pd, 'np': np, 'requests': requests, 'io': io, 'librosa': librosa, 'sf': sf, 'openpyxl': openpyxl}
150
+ with redirect_stdout(buffer):
151
+ exec(code, safe_globals)
152
+ return f"Execution successful. Output:\n{buffer.getvalue()}"
153
+ except Exception as e: return f"Execution failed. Error:\n{traceback.format_exc()}"
154
+
155
+ def youtube_transcript(self, url: str) -> str:
156
+ """Fetches the full transcript of a YouTube video from its URL."""
157
+ try:
158
+ video_id = re.search(r"(?<=v=)[\w-]+", url).group(0)
159
+ return " ".join([item['text'] for item in YouTubeTranscriptApi.get_transcript(video_id)])
160
+ except Exception as e: return f"Error fetching transcript: {e}"
161
 
162
+ # --- Main ReAct Loop ---
163
  def __call__(self, task: Dict[str, Any]) -> str:
 
 
 
164
  file_url = task.get("files", [None])[0]
165
+ system_prompt = self._create_system_prompt(file_url=file_url)
166
+
167
+ question = task.get("question", "")
168
  if file_url:
169
+ question += f"\nFile available at: {file_url}"
170
+
171
+ # Initialize the history correctly for the ReAct loop
172
+ history = f"{system_prompt}\nQuestion: {question}\nThought:"
173
+
174
+ for i in range(8):
175
+ print(f"--- Step {i+1} ---")
176
+
177
+ # The full prompt for this step is the entire history
178
+ llm_response = self.llm.invoke(history).content.strip()
179
+
180
+ # Append the agent's reasoning to the history
181
+ history += f"\n{llm_response}"
182
+
183
+ final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_response, re.DOTALL)
184
+ if final_answer_match:
185
+ answer = final_answer_match.group(1).strip()
186
+ print(f"Final Answer Found: {answer}")
187
+ return answer
188
+
189
+ action_match = re.search(r"Action:\s*(\w+)\s*Action Input:((.|\n)*)", llm_response)
190
+ if action_match:
191
+ tool_name = action_match.group(1).strip()
192
+ tool_input = action_match.group(2).strip(' \n"`')
193
+ if tool_name in self.tools:
194
+ try:
195
+ tool_result = self.tools[tool_name](tool_input)
196
+ except Exception as e:
197
+ tool_result = f"Error calling tool {tool_name}: {e}"
198
  else:
199
+ tool_result = f"Error: Unknown tool '{tool_name}'."
200
+
201
+ # Append the observation to the history for the next step
202
+ history += f"\nObservation: {tool_result}\nThought:"
203
+ else:
204
+ return llm_response
205
+
206
+ return "Agent could not reach a final answer after 8 steps."
207
 
208
  # --- Your Original, Correct Submission and Gradio Code ---
209
  def run_and_submit_all(profile: gr.OAuthProfile | None):