Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,8 +9,7 @@ from langchain_core.messages import HumanMessage
|
|
| 9 |
from langchain_openai import ChatOpenAI
|
| 10 |
from langchain_community.tools import DuckDuckGoSearchResults
|
| 11 |
from langchain_google_community import GoogleSearchAPIWrapper
|
| 12 |
-
from langchain_community.document_loaders import YoutubeLoader
|
| 13 |
-
from langchain_community.document_loaders import PyPDFLoader
|
| 14 |
import wikipedia
|
| 15 |
import speech_recognition as sr
|
| 16 |
import tempfile
|
|
@@ -62,8 +61,42 @@ def pdf_loader_tool(file_url: str) -> str:
|
|
| 62 |
|
| 63 |
except Exception as e:
|
| 64 |
return f"Reading failed: {str(e)}"
|
| 65 |
-
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
def read_image_text(file_URL: str) -> str:
|
| 69 |
"""Extract text from image downloaded from given file_URL using OCR."""
|
|
@@ -145,6 +178,8 @@ tools = [
|
|
| 145 |
subtract,
|
| 146 |
read_image_text,
|
| 147 |
pdf_loader_tool,
|
|
|
|
|
|
|
| 148 |
youtube_transcript_tool,
|
| 149 |
transcribe_audio,
|
| 150 |
analyze_python_code,
|
|
@@ -175,7 +210,22 @@ from langchain_core.messages import HumanMessage, SystemMessage
|
|
| 175 |
def assistant(state: AgentState, llm_with_tools):
|
| 176 |
# System message
|
| 177 |
textual_description_of_tools = """
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
pdf_loader_tool(file_url: str) -> str:
|
| 180 |
Load and extract text from a PDF file downloaded from given file_url.
|
| 181 |
Args:
|
|
|
|
| 9 |
from langchain_openai import ChatOpenAI
|
| 10 |
from langchain_community.tools import DuckDuckGoSearchResults
|
| 11 |
from langchain_google_community import GoogleSearchAPIWrapper
|
| 12 |
+
from langchain_community.document_loaders import YoutubeLoader,PyPDFLoader,Docx2txtLoader,TextLoader
|
|
|
|
| 13 |
import wikipedia
|
| 14 |
import speech_recognition as sr
|
| 15 |
import tempfile
|
|
|
|
| 61 |
|
| 62 |
except Exception as e:
|
| 63 |
return f"Reading failed: {str(e)}"
|
|
|
|
| 64 |
|
| 65 |
+
def docx_loader_tool(file_url: str) -> str:
|
| 66 |
+
"""Load and extract text from a docx file downloaded from given file_url."""
|
| 67 |
+
try:
|
| 68 |
+
# Download file into temporary file
|
| 69 |
+
with tempfile.NamedTemporaryFile(suffix=".docx", delete=True) as temp_file:
|
| 70 |
+
response = requests.get(file_url)
|
| 71 |
+
if response.status_code != 200:
|
| 72 |
+
return f"Failed to download file: {response.status_code}"
|
| 73 |
+
temp_file.write(response.content)
|
| 74 |
+
temp_file.flush() # Make sure data is written
|
| 75 |
+
# Read from temp file
|
| 76 |
+
loader = Docx2txtLoader(temp_file.name)
|
| 77 |
+
docs = loader.load()
|
| 78 |
+
return "\n".join([doc.page_content for doc in docs])
|
| 79 |
+
|
| 80 |
+
except Exception as e:
|
| 81 |
+
return f"Reading failed: {str(e)}"
|
| 82 |
+
|
| 83 |
+
def txt_loader_tool(file_url: str) -> str:
|
| 84 |
+
"""Load and extract text from a txt file downloaded from given file_url."""
|
| 85 |
+
try:
|
| 86 |
+
# Download file into temporary file
|
| 87 |
+
with tempfile.NamedTemporaryFile(suffix=".txt", delete=True) as temp_file:
|
| 88 |
+
response = requests.get(file_url)
|
| 89 |
+
if response.status_code != 200:
|
| 90 |
+
return f"Failed to download file: {response.status_code}"
|
| 91 |
+
temp_file.write(response.content)
|
| 92 |
+
temp_file.flush() # Make sure data is written
|
| 93 |
+
# Read from temp file
|
| 94 |
+
loader = TextLoader(temp_file.name)
|
| 95 |
+
docs = loader.load()
|
| 96 |
+
return "\n".join([doc.page_content for doc in docs])
|
| 97 |
+
|
| 98 |
+
except Exception as e:
|
| 99 |
+
return f"Reading failed: {str(e)}"
|
| 100 |
|
| 101 |
def read_image_text(file_URL: str) -> str:
|
| 102 |
"""Extract text from image downloaded from given file_URL using OCR."""
|
|
|
|
| 178 |
subtract,
|
| 179 |
read_image_text,
|
| 180 |
pdf_loader_tool,
|
| 181 |
+
docx_loader_tool,
|
| 182 |
+
txt_loader_tool,
|
| 183 |
youtube_transcript_tool,
|
| 184 |
transcribe_audio,
|
| 185 |
analyze_python_code,
|
|
|
|
| 210 |
def assistant(state: AgentState, llm_with_tools):
|
| 211 |
# System message
|
| 212 |
textual_description_of_tools = """
|
| 213 |
+
docx_loader_tool(file_url: str) -> str:
|
| 214 |
+
Load and extract text from a docx file downloaded from given file_url.
|
| 215 |
+
|
| 216 |
+
Args:
|
| 217 |
+
file_url, a string indicating the url of the given file.
|
| 218 |
+
Returns:
|
| 219 |
+
The text extracted from the given docx document.
|
| 220 |
+
|
| 221 |
+
def txt_loader_tool(file_url: str) -> str:
|
| 222 |
+
Load and extract text from a txt file downloaded from given file_url.
|
| 223 |
+
|
| 224 |
+
Args:
|
| 225 |
+
file_url, a string indicating the url of the given file.
|
| 226 |
+
Returns:
|
| 227 |
+
The text extracted from the given txt.
|
| 228 |
+
|
| 229 |
pdf_loader_tool(file_url: str) -> str:
|
| 230 |
Load and extract text from a PDF file downloaded from given file_url.
|
| 231 |
Args:
|