DrekFretson commited on
Commit
bddb2ff
·
verified ·
1 Parent(s): 56fa902

Delete nodes.py

Browse files
Files changed (1) hide show
  1. nodes.py +0 -322
nodes.py DELETED
@@ -1,322 +0,0 @@
1
- from state import QuestionState
2
- from agent_model import model
3
- from utilities import parse_response
4
-
5
- from langchain_core.messages import HumanMessage
6
-
7
- import mistune
8
- from bs4 import BeautifulSoup
9
- from youtube_transcript_api import YouTubeTranscriptApi
10
- # from googlesearch import search, SearchResult
11
- import re
12
- import os
13
- from dotenv import load_dotenv
14
- import base64
15
- import whisper
16
- import pandas as pd
17
- import requests
18
- import json
19
-
20
- model_whisper = whisper.load_model("tiny")
21
- load_dotenv()
22
-
23
-
24
- def read_question(state: QuestionState):
25
- """Agent reads and logs incoming question"""
26
- question = state["question"]
27
-
28
- print(f"Here is the current question:\n{question}")
29
-
30
- return {}
31
-
32
-
33
- def get_file(state: QuestionState):
34
- """Agent downloads a file through an API call"""
35
- # Constants
36
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
37
- task_id = state["task_id"]
38
- file_name = state["file_name"]
39
-
40
- # Construct the endpoint URL
41
- endpoint = f"{DEFAULT_API_URL}/files/{task_id}"
42
-
43
- # Make the GET request to download the file
44
- response = requests.get(endpoint)
45
-
46
- # Check if the request was successful
47
- if response.status_code == 200:
48
- # Write the content to a file
49
- with open(file_name, 'wb') as file:
50
- file.write(response.content)
51
- print(f"File downloaded successfully and saved as '{file_name}'")
52
- else:
53
- print(f"Failed to download file. Status code: {response.status_code}")
54
-
55
- return {}
56
-
57
-
58
- def classify_question(state: QuestionState):
59
- """Classifies the question and determines its type or category."""
60
- question = state["question"]
61
-
62
- # Prepare our prompt for the LLM
63
- prompt = f"""
64
- As a knowledgeable assistant, analyze the following question and classify it into a specific category.
65
- Question:
66
- {question}
67
- These are the pairs of classification and context you can use, in the form of <classification>+<context>:
68
- - web_search+<query_to_use_for_web_search> (use direct language in query, as you would in a google search)
69
- - url_crawl
70
- - text_reasoning+<the_question_or_statement_for_reasoning>
71
- - excel_file
72
- - code+<code_snippet_to_execute>
73
- - audio
74
- - youtube+<youtube_link_to_get_transcription>
75
- - other+<any_other_relevant_context>
76
- It is MANDATORY to provide your classification and context in the following format:
77
- classification: <category>
78
- context: <context>
79
- """
80
-
81
- # Call the LLM
82
- messages = [HumanMessage(content=prompt)]
83
- response = model.invoke(messages)
84
-
85
- # Parse the response
86
- classification, context = parse_response(response) # Implement this function to extract classification and context
87
-
88
- # Define context generation based on classification
89
- context_mapping = {
90
- "web_search": f"context: {question}",
91
- "url_crawl": f"context: {question}",
92
- "text_reasoning": f"context: {question}",
93
- "mathematics": f"context: {question}", # Implement this function
94
- "code": f"context: {question}",
95
- "other": f"context: {question}",
96
- }
97
-
98
- # Update messages for tracking
99
- new_messages = state.get("messages", []) + [
100
- {"role": "user", "content": prompt},
101
- {"role": "assistant", "content": response.content}
102
- ]
103
-
104
- # Return state updates
105
- return {
106
- "question_category": classification,
107
- "context": context,
108
- "messages": new_messages
109
- }
110
-
111
-
112
- def handle_other(state: QuestionState):
113
- """The assistant gives a response through the llm model"""
114
- question = state["question"]
115
-
116
- prompt = f"""
117
- {question}
118
- """
119
-
120
- # Call the LLM
121
- messages = [HumanMessage(content=prompt)]
122
- response = model.invoke(messages)
123
-
124
- return {
125
- "expected_answer": response.content
126
- }
127
-
128
-
129
- def handle_reasoning(state: QuestionState):
130
- """The assistant gives a response through the llm model"""
131
- question = state["question"]
132
- context = state["context"]
133
-
134
- system_prompt = f"""
135
- You are a helpful assistant tasked with answering questions using a set of tools.
136
- Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
137
- FINAL ANSWER: [YOUR FINAL ANSWER].
138
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
139
- Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
140
- """
141
-
142
- prompt = f"""
143
- {question}
144
- {context}
145
- """
146
-
147
- message_content = [
148
- {
149
- "type": "text",
150
- "text": system_prompt,
151
- },
152
- {
153
- "type": "text",
154
- "text": prompt,
155
- }
156
- ]
157
-
158
- if state["file_name"] and "png" in state["file_name"]:
159
- image_path = state["file_name"]
160
- with open(image_path, "rb") as image_file:
161
- # Encode the image data to base64
162
- image_data = base64.b64encode(image_file.read()).decode("utf-8")
163
-
164
- message_content.append(
165
- {
166
- "type": "image",
167
- "source_type": "base64",
168
- "data": image_data,
169
- "mime_type": "image/jpeg",
170
- })
171
-
172
- # Call the LLM
173
- messages = [HumanMessage(content=message_content)]
174
- response = model.invoke(messages)
175
-
176
- return {
177
- "expected_answer": response.content
178
- }
179
-
180
-
181
- def excel_to_markdown(state: QuestionState):
182
- file_name = state["file_name"]
183
- # Read the Excel file
184
- df = pd.read_excel(file_name)
185
-
186
- # Convert the DataFrame to Markdown format
187
- markdown_table = df.to_markdown(index=False)
188
-
189
- return {
190
- "context": markdown_table
191
- }
192
-
193
-
194
- def create_json_for_math(state: QuestionState):
195
- question = state["question"]
196
- context = state["context"]
197
-
198
- prompt = f"""
199
- considering this table:
200
- {context}
201
- and this question:
202
- {question}
203
- create a json file in this format: [1, 2, 3, 4, 5]
204
- Where each number is going to be added together later
205
- in the json, put all the numbers that need to be added together, following the request of the question.
206
- finally, only give this as a response:
207
- json_numbers: <json_object_you_created>
208
- """
209
- message_content = [
210
- {
211
- "type": "text",
212
- "text": prompt,
213
- }
214
- ]
215
-
216
- # Call the LLM
217
- messages = [HumanMessage(content=message_content)]
218
- response = model.invoke(messages)
219
-
220
- json_to_add = response.content.replace("json_numbers: ", "").strip()
221
-
222
- parsed_data = json.loads(json_to_add)
223
-
224
- return {
225
- "context": str(sum(parsed_data))
226
- }
227
-
228
-
229
- def transcribe_audio(state: QuestionState):
230
- file_name = state["file_name"]
231
- audio_transcription = model_whisper.transcribe(file_name)
232
- text = audio_transcription['text']
233
-
234
- return {
235
- "context": text
236
- }
237
-
238
-
239
- def get_youtube_transcript(state: QuestionState):
240
- """Tool to get the transcript of a YouTube video"""
241
- question = state["question"]
242
-
243
- # Use a regular expression to find the video ID
244
- match = re.search(r'https://www\.youtube\.com/watch\?v=([a-zA-Z0-9_-]+)', question)
245
-
246
- if match:
247
- video_id = match.group(1)
248
- print(video_id)
249
- else:
250
- print("No video ID found.")
251
-
252
- # video_id = question.split("https://www.youtube.com/watch?v=")
253
- ytt_api = YouTubeTranscriptApi()
254
- fetched_transcript = ytt_api.fetch(video_id)
255
-
256
- full_transcript = ''
257
- for snippet in fetched_transcript:
258
- full_transcript += snippet.text + '\n'
259
-
260
- return {"context": full_transcript}
261
-
262
-
263
- def get_final_answer(state: QuestionState):
264
- expected_answer = state["expected_answer"]
265
-
266
- return {
267
- "expected_answer": expected_answer.split("FINAL ANSWER: ")[1].strip()
268
- }
269
-
270
-
271
- def handle_web_search(state: QuestionState):
272
- """The assistant runs a web search tool to search for information on the Web"""
273
- base_url = "https://www.googleapis.com/customsearch/v1"
274
-
275
- search_input = state["context"].replace("\"", "")
276
- print(f"Search query: {search_input}")
277
- params = {
278
- 'key': os.getenv("CUSTOM_SEARCH_API"),
279
- 'cx': os.getenv("SEARCH_ID"),
280
- 'q': search_input,
281
- 'num': 5,
282
- 'start': 1
283
- }
284
- # search_results = search(search_input, num_results=10, advanced=True)
285
- response = requests.get(base_url, params=params)
286
- response_json = response.json()
287
- print('---RESPONSE JSON---')
288
- print(response_json)
289
-
290
- chosen_link = "" # i don't want pdf files, they're too long
291
- # Extract links from the items
292
- links = [item['link'] for item in response_json['items']]
293
- for l in links:
294
- if ".pdf" not in l:
295
- chosen_link = l
296
- break
297
- print('---LINKS---')
298
- print(links)
299
-
300
- return {
301
- "context": chosen_link
302
- }
303
-
304
-
305
- def enter_url(state: QuestionState):
306
- url = state["context"]
307
- response = requests.get(url)
308
- html_content = response.text
309
-
310
- # Step 2: Parse the HTML content
311
- soup = BeautifulSoup(html_content, 'html.parser')
312
-
313
- # Step 3: Convert to Markdown
314
- markdown_converter = mistune.Markdown()
315
- markdown_content = markdown_converter(soup.get_text())
316
-
317
- # Print the Markdown content
318
- print(markdown_content)
319
-
320
- return {
321
- "context": markdown_content
322
- }