RuaZhou commited on
Commit
64f4cb9
·
verified ·
1 Parent(s): 27fee7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -4
app.py CHANGED
@@ -9,8 +9,7 @@ from langchain_core.messages import HumanMessage
9
  from langchain_openai import ChatOpenAI
10
  from langchain_community.tools import DuckDuckGoSearchResults
11
  from langchain_google_community import GoogleSearchAPIWrapper
12
- from langchain_community.document_loaders import YoutubeLoader
13
- from langchain_community.document_loaders import PyPDFLoader
14
  import wikipedia
15
  import speech_recognition as sr
16
  import tempfile
@@ -62,8 +61,42 @@ def pdf_loader_tool(file_url: str) -> str:
62
 
63
  except Exception as e:
64
  return f"Reading failed: {str(e)}"
65
-
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  def read_image_text(file_URL: str) -> str:
69
  """Extract text from image downloaded from given file_URL using OCR."""
@@ -145,6 +178,8 @@ tools = [
145
  subtract,
146
  read_image_text,
147
  pdf_loader_tool,
 
 
148
  youtube_transcript_tool,
149
  transcribe_audio,
150
  analyze_python_code,
@@ -175,7 +210,22 @@ from langchain_core.messages import HumanMessage, SystemMessage
175
  def assistant(state: AgentState, llm_with_tools):
176
  # System message
177
  textual_description_of_tools = """
178
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  pdf_loader_tool(file_url: str) -> str:
180
  Load and extract text from a PDF file downloaded from given file_url.
181
  Args:
 
9
  from langchain_openai import ChatOpenAI
10
  from langchain_community.tools import DuckDuckGoSearchResults
11
  from langchain_google_community import GoogleSearchAPIWrapper
12
+ from langchain_community.document_loaders import YoutubeLoader,PyPDFLoader,Docx2txtLoader,TextLoader
 
13
  import wikipedia
14
  import speech_recognition as sr
15
  import tempfile
 
61
 
62
  except Exception as e:
63
  return f"Reading failed: {str(e)}"
 
64
 
65
+ def docx_loader_tool(file_url: str) -> str:
66
+ """Load and extract text from a docx file downloaded from given file_url."""
67
+ try:
68
+ # Download file into temporary file
69
+ with tempfile.NamedTemporaryFile(suffix=".docx", delete=True) as temp_file:
70
+ response = requests.get(file_url)
71
+ if response.status_code != 200:
72
+ return f"Failed to download file: {response.status_code}"
73
+ temp_file.write(response.content)
74
+ temp_file.flush() # Make sure data is written
75
+ # Read from temp file
76
+ loader = Docx2txtLoader(temp_file.name)
77
+ docs = loader.load()
78
+ return "\n".join([doc.page_content for doc in docs])
79
+
80
+ except Exception as e:
81
+ return f"Reading failed: {str(e)}"
82
+
83
+ def txt_loader_tool(file_url: str) -> str:
84
+ """Load and extract text from a txt file downloaded from given file_url."""
85
+ try:
86
+ # Download file into temporary file
87
+ with tempfile.NamedTemporaryFile(suffix=".txt", delete=True) as temp_file:
88
+ response = requests.get(file_url)
89
+ if response.status_code != 200:
90
+ return f"Failed to download file: {response.status_code}"
91
+ temp_file.write(response.content)
92
+ temp_file.flush() # Make sure data is written
93
+ # Read from temp file
94
+ loader = TextLoader(temp_file.name)
95
+ docs = loader.load()
96
+ return "\n".join([doc.page_content for doc in docs])
97
+
98
+ except Exception as e:
99
+ return f"Reading failed: {str(e)}"
100
 
101
  def read_image_text(file_URL: str) -> str:
102
  """Extract text from image downloaded from given file_URL using OCR."""
 
178
  subtract,
179
  read_image_text,
180
  pdf_loader_tool,
181
+ docx_loader_tool,
182
+ txt_loader_tool,
183
  youtube_transcript_tool,
184
  transcribe_audio,
185
  analyze_python_code,
 
210
  def assistant(state: AgentState, llm_with_tools):
211
  # System message
212
  textual_description_of_tools = """
213
+ docx_loader_tool(file_url: str) -> str:
214
+ Load and extract text from a docx file downloaded from given file_url.
215
+
216
+ Args:
217
+ file_url, a string indicating the url of the given file.
218
+ Returns:
219
+ The text extracted from the given docx document.
220
+
221
+ def txt_loader_tool(file_url: str) -> str:
222
+ Load and extract text from a txt file downloaded from given file_url.
223
+
224
+ Args:
225
+ file_url, a string indicating the url of the given file.
226
+ Returns:
227
+ The text extracted from the given txt.
228
+
229
  pdf_loader_tool(file_url: str) -> str:
230
  Load and extract text from a PDF file downloaded from given file_url.
231
  Args: