MohamedAliAmiraa commited on
Commit
75b066d
·
verified ·
1 Parent(s): 7c505b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -137
app.py CHANGED
@@ -12,7 +12,7 @@ import traceback
12
  # --- Core Libraries ---
13
  try:
14
  from langchain_openai import AzureChatOpenAI
15
- from ddgs import DDGS # Use the new, correct library
16
  from bs4 import BeautifulSoup
17
  from youtube_transcript_api import YouTubeTranscriptApi
18
  import openpyxl, librosa, soundfile as sf, numpy as np
@@ -22,10 +22,10 @@ except ImportError:
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
- # --- Agent Definition: A Professional ReAct Agent with Dynamic Examples ---
26
  class BasicAgent:
27
  def __init__(self):
28
- print("Initializing ReAct Agent...")
29
  try:
30
  self.llm = AzureChatOpenAI(
31
  azure_endpoint="https://dsap.openai.azure.com/",
@@ -38,156 +38,111 @@ class BasicAgent:
38
  raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
39
 
40
  self.tools = {
41
- "search": self.search,
42
- "browse": self.browse,
43
- "python": self.python,
44
- "youtube_transcript": self.youtube_transcript,
45
  }
 
46
  print("Agent initialized.")
47
 
48
- def _create_system_prompt(self, file_url=None) -> str:
49
- """Creates the master prompt, dynamically injecting a file-handling example if a URL is provided."""
50
- tool_docs = "\n".join([f"- {name}: {inspect.getdoc(func)}" for name, func in self.tools.items()])
51
-
52
- web_search_example = """
53
- **Example: Web Search**
54
- Question: Who was the prime minister of the UK in 1999?
55
- Thought: I need to find out who was the prime minister of the UK in 1999. I will use the search tool.
56
- Action: search
57
- Action Input: prime minister of UK 1999
58
- Observation: [{{'title': 'Tony Blair - Wikipedia', 'href': 'https://en.wikipedia.org/wiki/Tony_Blair', ...}}]
59
- Thought: The search results point to Tony Blair. I will browse the Wikipedia page to confirm.
60
- Action: browse
61
- Action Input: https://en.wikipedia.org/wiki/Tony_Blair
62
- Observation: [Page content confirming Tony Blair was Prime Minister from 1997 to 2007]
63
- Thought: I have confirmed the answer from a reliable source.
64
- Final Answer: Tony Blair"""
65
-
66
- file_analysis_example = ""
67
- if file_url:
68
- code_snippet = ""
69
- if file_url.endswith(('.xlsx', '.csv')):
70
- code_snippet = f"""
71
- import pandas as pd
72
- import requests
73
- import io
74
- url = '{file_url}'
75
- response = requests.get(url)
76
- df = pd.read_excel(io.BytesIO(response.content))
77
- print(df.to_string())
78
- """
79
- elif file_url.endswith('.py'):
80
- code_snippet = f"""
81
- import requests
82
- url = '{file_url}'
83
- response = requests.get(url)
84
- python_code_to_run = response.text
85
- print(python_code_to_run)
86
- """
87
-
88
- if code_snippet:
89
- file_analysis_example = f"""
90
- **Example: File Analysis (Use this exact code pattern)**
91
- Question: Analyze the attached file. File available at: {file_url}
92
- Thought: The user has provided a file. I must use the `python` tool to download and analyze it using the exact URL from the question. The following code pattern is perfect for this. I will copy it exactly.
93
- Action: python
94
- Action Input:
95
- {code_snippet}
96
- Observation: [The output of the python script]
97
- Thought: I have analyzed the file content. Now I can answer the user's question based on the script's output.
98
- Final Answer: [Answer based on the script's output]"""
99
-
100
- return f"""
101
- You are a helpful assistant that answers questions by thinking step-by-step and using the tools provided.
102
  **Process:**
103
- 1. **Thought:** Analyze the user's question and create a plan. If you see an example below that matches your plan, follow it exactly.
104
- 2. **Action:** Choose ONE tool from the list: {", ".join(self.tools.keys())}.
105
- 3. **Action Input:** Provide the input for the chosen tool. This can be multi-line.
106
- 4. **Observation:** After you use a tool, you will see its output.
107
- 5. Repeat this Thought/Action/Action Input/Observation cycle until you are certain you have the final answer.
108
- 6. **Thought:** Conclude that you have the final answer.
109
- 7. **Final Answer:** Provide the final, direct answer to the user's question.
110
-
111
- You have access to the following tools:
112
- {tool_docs}
113
-
114
- {web_search_example}
115
- {file_analysis_example}
116
-
117
  Begin!
118
  """
119
 
120
  # --- Tool Definitions ---
121
- def search(self, query: str) -> str:
122
- """Searches the web with DuckDuckGo to find relevant URLs and information."""
 
 
123
  try:
124
  with DDGS() as ddgs:
125
- return str([r for r in ddgs.text(query, max_results=4)])
126
- except Exception as e: return f"Error during search: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- def browse(self, url: str) -> str:
129
- """Gets the full, clean text content of a single webpage URL."""
130
- try:
131
- response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
132
- soup = BeautifulSoup(response.content, 'html.parser')
133
- return ' '.join(soup.get_text().split())[:4000]
134
- except Exception as e: return f"Error browsing {url}: {e}"
135
-
136
- def python(self, code: str) -> str:
137
- """Executes Python code to analyze data or files. Use `requests` to download files from URLs."""
138
- code = code.strip().strip("`").replace("python\n", "").strip()
139
- buffer = io.StringIO()
140
- try:
141
- safe_globals = {'pd': pd, 'np': np, 'requests': requests, 'io': io, 'librosa': librosa, 'sf': sf, 'openpyxl': openpyxl}
142
- with redirect_stdout(buffer):
143
- exec(code, safe_globals)
144
- return f"Execution successful. Output:\n{buffer.getvalue()}"
145
- except Exception as e: return f"Execution failed. Error:\n{traceback.format_exc()}"
146
-
147
- def youtube_transcript(self, url: str) -> str:
148
- """Fetches the full transcript of a YouTube video from its URL."""
149
  try:
150
- video_id = re.search(r"(?<=v=)[\w-]+", url).group(0)
151
- return " ".join([item['text'] for item in YouTubeTranscriptApi.get_transcript(video_id)])
152
- except Exception as e: return f"Error fetching transcript: {e}"
 
 
 
 
 
 
 
 
 
153
 
154
- # --- Main ReAct Loop ---
155
  def __call__(self, task: Dict[str, Any]) -> str:
156
- file_url = task.get("files", [None])[0]
157
- system_prompt = self._create_system_prompt(file_url=file_url)
158
-
159
  question = task.get("question", "")
160
- if file_url:
161
- question += f"\nFile available at: {file_url}"
162
-
163
- prompt = f"{system_prompt}\nQuestion: {question}\nThought:"
164
- history = ""
165
-
166
- for i in range(8):
167
- full_prompt = prompt + history
168
- llm_response = self.llm.invoke(full_prompt).content.strip()
169
- history += f"\n{llm_response}"
170
-
171
- final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_response, re.DOTALL)
172
- if final_answer_match:
173
- return final_answer_match.group(1).strip()
174
-
175
- # *** THIS IS THE ONLY LINE THAT HAS BEEN CHANGED ***
176
- # Corrected the typo from `ll.response` to `llm_response`
177
- action_match = re.search(r"Action:\s*(\w+)\s*Action Input:((.|\n)*)", llm_response)
178
-
179
- if action_match:
180
- tool_name = action_match.group(1).strip()
181
- tool_input = action_match.group(2).strip(' \n"`')
182
- if tool_name in self.tools:
183
- try: tool_result = self.tools[tool_name](tool_input)
184
- except Exception as e: tool_result = f"Error calling tool {tool_name}: {e}"
185
- else: tool_result = f"Error: Unknown tool '{tool_name}'."
186
- history += f"\nObservation: {tool_result}\nThought:"
187
- else:
188
- return llm_response
189
 
190
- return "Agent could not reach a final answer after 8 steps."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  # --- Your Original, Correct Submission and Gradio Code ---
193
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
12
  # --- Core Libraries ---
13
  try:
14
  from langchain_openai import AzureChatOpenAI
15
+ from ddgs import DDGS
16
  from bs4 import BeautifulSoup
17
  from youtube_transcript_api import YouTubeTranscriptApi
18
  import openpyxl, librosa, soundfile as sf, numpy as np
 
22
  # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
 
25
+ # --- Agent Definition: A Smart Orchestrator + ReAct Agent ---
26
  class BasicAgent:
27
  def __init__(self):
28
+ print("Initializing Hybrid Agent...")
29
  try:
30
  self.llm = AzureChatOpenAI(
31
  azure_endpoint="https://dsap.openai.azure.com/",
 
38
  raise KeyError("CRITICAL: 'AZURE_API_KEY' secret is missing.")
39
 
40
  self.tools = {
41
+ "search_and_browse": self.search_and_browse,
42
+ "python_file_analyzer": self.python_file_analyzer,
 
 
43
  }
44
+ self.react_system_prompt = self._create_react_prompt()
45
  print("Agent initialized.")
46
 
47
+ def _create_react_prompt(self) -> str:
48
+ """Creates the prompt for the ReAct loop (for web questions)."""
49
+ return """
50
+ You are a helpful assistant that answers questions by searching the web.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  **Process:**
52
+ 1. **Thought:** Analyze the user's question and decide what to search for.
53
+ 2. **Action:** Use the `search_and_browse` tool.
54
+ 3. **Action Input:** Provide a concise search query.
55
+ 4. **Observation:** You will see the content of the top search results.
56
+ 5. **Thought:** Analyze the search results. If you have enough information, provide the final answer. If not, refine your search and use the `search_and_browse` tool again.
57
+ 6. **Final Answer:** Provide the final, direct answer to the user's question.
 
 
 
 
 
 
 
 
58
  Begin!
59
  """
60
 
61
  # --- Tool Definitions ---
62
+ def search_and_browse(self, query: str) -> str:
63
+ """Searches the web and browses the top 3 results to gather context."""
64
+ print(f"Tool: search_and_browse, Query: {query}")
65
+ context = ""
66
  try:
67
  with DDGS() as ddgs:
68
+ results = [r for r in ddgs.text(query, max_results=3)]
69
+ if not results: return f"No results found for '{query}'."
70
+
71
+ for result in results:
72
+ try:
73
+ url = result['href']
74
+ response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
75
+ soup = BeautifulSoup(response.content, 'html.parser')
76
+ context += f"Source: {url}\nContent: {' '.join(soup.get_text().split())[:1500]}\n\n"
77
+ except Exception as e:
78
+ context += f"Could not browse {url}: {e}\n\n"
79
+ return context
80
+ except Exception as e:
81
+ return f"Error during search: {e}"
82
+
83
+ def python_file_analyzer(self, file_url: str) -> str:
84
+ """
85
+ Downloads a file from a URL and analyzes its content using Python.
86
+ This tool is called directly by the orchestrator, not by the LLM.
87
+ """
88
+ print(f"Tool: python_file_analyzer, URL: {file_url}")
89
+
90
+ # Handle non-downloadable file types first
91
+ if file_url.endswith(('.png', '.jpg', '.jpeg', '.gif')):
92
+ return "Limitation: I cannot analyze image content. Please describe the image."
93
+ if file_url.endswith(('.mp3', '.wav')):
94
+ return "Limitation: I cannot reliably transcribe audio files. Please provide a transcript."
95
 
96
+ # For downloadable files, use Python
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  try:
98
+ response = requests.get(file_url)
99
+ response.raise_for_status()
100
+
101
+ if file_url.endswith('.xlsx'):
102
+ df = pd.read_excel(io.BytesIO(response.content))
103
+ return f"Successfully read the Excel file. Here is its content:\n\n{df.to_string()}"
104
+ elif file_url.endswith('.py'):
105
+ return f"Successfully read the Python file. Here is its content:\n\n{response.text}"
106
+ else:
107
+ return "Unsupported file type."
108
+ except Exception as e:
109
+ return f"Failed to download or process the file. Error: {e}"
110
 
111
+ # --- Main Orchestrator Logic ---
112
  def __call__(self, task: Dict[str, Any]) -> str:
 
 
 
113
  question = task.get("question", "")
114
+ print(f"\n--- New Task ---\nQuestion: {question[:150]}...")
115
+
116
+ file_url = task.get("files", [None])[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ # STRATEGY 1: Deterministic File Handling (Orchestrator)
119
+ if file_url:
120
+ # The orchestrator calls the tool directly, removing LLM unreliability
121
+ context = self.python_file_analyzer(file_url)
122
+ final_prompt = f"Based ONLY on the following file content, provide a direct and concise answer to the user's question.\n\nFile Content:\n{context}\n\nUser Question:\n{question}"
123
+ return self.llm.invoke(final_prompt).content
124
+
125
+ # STRATEGY 2: Flexible Web Search (ReAct Loop)
126
+ else:
127
+ prompt = f"{self.react_system_prompt}\nQuestion: {question}\nThought:"
128
+ history = ""
129
+ for i in range(5): # Max 5 steps
130
+ full_prompt = prompt + history
131
+ llm_response = self.llm.invoke(full_prompt).content.strip()
132
+
133
+ final_answer_match = re.search(r"Final Answer:\s*(.*)", llm_response, re.DOTALL)
134
+ if final_answer_match:
135
+ return final_answer_match.group(1).strip()
136
+
137
+ # For web search, we assume the only tool is search_and_browse
138
+ action_match = re.search(r"Action Input:\s*(.*)", llm_response)
139
+ if action_match:
140
+ query = action_match.group(1).strip()
141
+ observation = self.search_and_browse(query)
142
+ history += f"\n{llm_response}\nObservation: {observation}\nThought:"
143
+ else:
144
+ return llm_response # Fallback
145
+ return "Agent could not reach a final answer after multiple web searches."
146
 
147
  # --- Your Original, Correct Submission and Gradio Code ---
148
  def run_and_submit_all(profile: gr.OAuthProfile | None):