Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,11 +10,13 @@ from langchain_openai import ChatOpenAI
|
|
| 10 |
from langchain_community.tools import DuckDuckGoSearchResults
|
| 11 |
from langchain_google_community import GoogleSearchAPIWrapper
|
| 12 |
from langchain_community.document_loaders import YoutubeLoader
|
|
|
|
| 13 |
import wikipedia
|
| 14 |
import speech_recognition as sr
|
| 15 |
import tempfile
|
| 16 |
import ast
|
| 17 |
-
|
|
|
|
| 18 |
# Or using AudioTranscriptTool.
|
| 19 |
|
| 20 |
|
|
@@ -42,7 +44,44 @@ def add(a: int|float, b: int|float) -> float:
|
|
| 42 |
def subtract(a: int|float, b: int|float) -> float:
|
| 43 |
"""Subtract a with b."""
|
| 44 |
return a - b
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def youtube_transcript_tool(url: str) -> str:
|
| 47 |
"""Load transcript from a YouTube video URL."""
|
| 48 |
loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
|
|
@@ -104,6 +143,8 @@ tools = [
|
|
| 104 |
multiply,
|
| 105 |
add,
|
| 106 |
subtract,
|
|
|
|
|
|
|
| 107 |
youtube_transcript_tool,
|
| 108 |
transcribe_audio,
|
| 109 |
analyze_python_code,
|
|
@@ -133,8 +174,22 @@ from langchain_core.messages import HumanMessage, SystemMessage
|
|
| 133 |
|
| 134 |
def assistant(state: AgentState, llm_with_tools):
|
| 135 |
# System message
|
| 136 |
-
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
transcribe_audio(file_url: str) -> str:
|
| 139 |
Downloads an audio file from a URL into a temporary file and transcribes it using SpeechRecognition.
|
| 140 |
Args:
|
|
@@ -196,7 +251,7 @@ Substract(a: int|float, b: int|float) -> float:
|
|
| 196 |
previous_message = state["messages"]
|
| 197 |
sys_msg = SystemMessage(content=f"""You are an agent that must use tools for computations or unknown info. Think step-by-step: 1. Analyze question. 2. Call tools if needed. 3. Summarize.
|
| 198 |
Please call different search tools or wikipedia tool multiple times if needed for verification or depth, aiming for at least third times on complex queries.
|
| 199 |
-
And here are the tools you can use :\n{
|
| 200 |
I will ask you a question. Please return your answer with the following template: [YOUR FINAL ANSWER] without brackets.
|
| 201 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 202 |
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|
|
|
|
| 10 |
from langchain_community.tools import DuckDuckGoSearchResults
|
| 11 |
from langchain_google_community import GoogleSearchAPIWrapper
|
| 12 |
from langchain_community.document_loaders import YoutubeLoader
|
| 13 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 14 |
import wikipedia
|
| 15 |
import speech_recognition as sr
|
| 16 |
import tempfile
|
| 17 |
import ast
|
| 18 |
+
import pytesseract
|
| 19 |
+
from PIL import Image
|
| 20 |
# Or using AudioTranscriptTool.
|
| 21 |
|
| 22 |
|
|
|
|
| 44 |
def subtract(a: int|float, b: int|float) -> float:
|
| 45 |
"""Subtract a with b."""
|
| 46 |
return a - b
|
| 47 |
+
|
| 48 |
+
def pdf_loader_tool(file_url: str) -> str:
|
| 49 |
+
"""Load and extract text from a PDF file downloaded from given file_url."""
|
| 50 |
+
try:
|
| 51 |
+
# Download file into temporary file
|
| 52 |
+
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as temp_file:
|
| 53 |
+
response = requests.get(file_url)
|
| 54 |
+
if response.status_code != 200:
|
| 55 |
+
return f"Failed to download file: {response.status_code}"
|
| 56 |
+
temp_file.write(response.content)
|
| 57 |
+
temp_file.flush() # Make sure data is written
|
| 58 |
+
# Read from temp file
|
| 59 |
+
loader = PyPDFLoader(temp_file.name)
|
| 60 |
+
docs = loader.load()
|
| 61 |
+
return "\n".join([doc.page_content for doc in docs])
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
return f"Reading failed: {str(e)}"
|
| 65 |
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def read_image_text(file_URL: str) -> str:
|
| 69 |
+
"""Extract text from image downloaded from given file_URL using OCR."""
|
| 70 |
+
try:
|
| 71 |
+
# Download file into temporary file
|
| 72 |
+
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=True) as temp_file:
|
| 73 |
+
response = requests.get(file_url)
|
| 74 |
+
if response.status_code != 200:
|
| 75 |
+
return f"Failed to download file: {response.status_code}"
|
| 76 |
+
temp_file.write(response.content)
|
| 77 |
+
temp_file.flush() # Make sure data is written
|
| 78 |
+
# Read from temp file
|
| 79 |
+
image = Image.open(temp_file.name)
|
| 80 |
+
return pytesseract.image_to_string(image)
|
| 81 |
+
|
| 82 |
+
except Exception as e:
|
| 83 |
+
return f"Reading failed: {str(e)}"
|
| 84 |
+
|
| 85 |
def youtube_transcript_tool(url: str) -> str:
|
| 86 |
"""Load transcript from a YouTube video URL."""
|
| 87 |
loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
|
|
|
|
| 143 |
multiply,
|
| 144 |
add,
|
| 145 |
subtract,
|
| 146 |
+
read_image_text,
|
| 147 |
+
pdf_loader_tool,
|
| 148 |
youtube_transcript_tool,
|
| 149 |
transcribe_audio,
|
| 150 |
analyze_python_code,
|
|
|
|
| 174 |
|
| 175 |
def assistant(state: AgentState, llm_with_tools):
|
| 176 |
# System message
|
| 177 |
+
textual_description_of_tools = """
|
| 178 |
|
| 179 |
+
pdf_loader_tool(file_url: str) -> str:
|
| 180 |
+
Load and extract text from a PDF file downloaded from given file_url.
|
| 181 |
+
Args:
|
| 182 |
+
file_url, a string indicating the url of the given file.
|
| 183 |
+
Returns:
|
| 184 |
+
The text extracted from the given pdf.
|
| 185 |
+
|
| 186 |
+
read_image_text(file_URL: str) -> str:
|
| 187 |
+
Extract text from image downloaded from given file_URL using OCR.
|
| 188 |
+
Args:
|
| 189 |
+
file_URL, a string indicating the url of the given file.
|
| 190 |
+
Returns:
|
| 191 |
+
The text extracted from the given image.
|
| 192 |
+
|
| 193 |
transcribe_audio(file_url: str) -> str:
|
| 194 |
Downloads an audio file from a URL into a temporary file and transcribes it using SpeechRecognition.
|
| 195 |
Args:
|
|
|
|
| 251 |
previous_message = state["messages"]
|
| 252 |
sys_msg = SystemMessage(content=f"""You are an agent that must use tools for computations or unknown info. Think step-by-step: 1. Analyze question. 2. Call tools if needed. 3. Summarize.
|
| 253 |
Please call different search tools or wikipedia tool multiple times if needed for verification or depth, aiming for at least third times on complex queries.
|
| 254 |
+
And here are the tools you can use :\n{textual_description_of_tools} \n
|
| 255 |
I will ask you a question. Please return your answer with the following template: [YOUR FINAL ANSWER] without brackets.
|
| 256 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 257 |
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
|