Lasdw commited on
Commit
36dcbf4
·
1 Parent(s): d3fb3e0

added pdf viewing tool

Browse files
Files changed (4) hide show
  1. agent.py +63 -2
  2. app.py +20 -3
  3. requirements.txt +2 -0
  4. tools.py +113 -0
agent.py CHANGED
@@ -32,7 +32,8 @@ from tools import (
32
  transcribe_audio,
33
  extract_python_code_from_complex_input,
34
  process_image,
35
- read_file
 
36
  )
37
 
38
  load_dotenv()
@@ -544,7 +545,10 @@ def extract_json_from_text(text: str) -> dict:
544
  "excel": "excel_to_text",
545
  "youtube": "process_youtube_video",
546
  "webpage": "webpage_scrape",
547
- "scrape": "webpage_scrape"
 
 
 
548
  }
549
 
550
  if result["action"].lower() in tool_mapping:
@@ -1390,6 +1394,58 @@ def read_file_node(state: AgentState) -> Dict[str, Any]:
1390
  "action_input": None # Clear the action input
1391
  }
1392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1393
  # Router function to direct to the correct tool
1394
  def router(state: AgentState) -> str:
1395
  """Route to the appropriate tool based on the current_tool field."""
@@ -1420,6 +1476,8 @@ def router(state: AgentState) -> str:
1420
  return "process_image"
1421
  elif tool == "read_file":
1422
  return "read_file"
 
 
1423
  else:
1424
  return "end"
1425
 
@@ -1441,6 +1499,7 @@ def create_agent_graph() -> StateGraph:
1441
  builder.add_node("transcribe_audio", transcribe_audio_node)
1442
  builder.add_node("process_image", process_image_node)
1443
  builder.add_node("read_file", read_file_node)
 
1444
 
1445
  # Define edges: these determine how the control flow moves
1446
  builder.add_edge(START, "assistant")
@@ -1476,6 +1535,7 @@ def create_agent_graph() -> StateGraph:
1476
  "transcribe_audio": "transcribe_audio",
1477
  "process_image": "process_image",
1478
  "read_file": "read_file",
 
1479
  "end": END
1480
  }
1481
  )
@@ -1492,6 +1552,7 @@ def create_agent_graph() -> StateGraph:
1492
  builder.add_edge("transcribe_audio", "assistant")
1493
  builder.add_edge("process_image", "assistant")
1494
  builder.add_edge("read_file", "assistant")
 
1495
 
1496
  # Compile the graph
1497
  return builder.compile()
 
32
  transcribe_audio,
33
  extract_python_code_from_complex_input,
34
  process_image,
35
+ read_file,
36
+ process_online_document
37
  )
38
 
39
  load_dotenv()
 
545
  "excel": "excel_to_text",
546
  "youtube": "process_youtube_video",
547
  "webpage": "webpage_scrape",
548
+ "scrape": "webpage_scrape",
549
+ "pdf": "process_online_document",
550
+ "document": "process_online_document",
551
+ "online": "process_online_document"
552
  }
553
 
554
  if result["action"].lower() in tool_mapping:
 
1394
  "action_input": None # Clear the action input
1395
  }
1396
 
1397
+ def process_online_document_node(state: AgentState) -> Dict[str, Any]:
1398
+ """Node that processes online PDFs and images."""
1399
+ print("Online Document Processing Tool Called...\n\n")
1400
+
1401
+ # Extract tool arguments
1402
+ action_input = state.get("action_input", {})
1403
+ print(f"Online document processing action_input: {action_input}")
1404
+
1405
+ # Extract URL and document type
1406
+ url = ""
1407
+ doc_type = "auto" # Default to auto-detection
1408
+
1409
+ if isinstance(action_input, dict):
1410
+ url = action_input.get("url", "")
1411
+ doc_type = action_input.get("doc_type", "auto")
1412
+ elif isinstance(action_input, str):
1413
+ url = action_input
1414
+
1415
+ print(f"Processing online document: '{url}' (type: {doc_type})")
1416
+
1417
+ # Safety check - don't run with empty URL
1418
+ if not url:
1419
+ result = "Error: No URL provided. Please provide a valid URL to process."
1420
+ elif not url.startswith(("http://", "https://")):
1421
+ result = f"Error: Invalid URL format: {url}. Please provide a valid URL starting with http:// or https://."
1422
+ else:
1423
+ # Call the online document processing function
1424
+ try:
1425
+ result = process_online_document(url, doc_type)
1426
+ except Exception as e:
1427
+ result = f"Error processing online document: {str(e)}\n\nThis could be due to:\n- The document is not accessible\n- Network connectivity issues\n- Unsupported document type\n- Rate limiting"
1428
+
1429
+ print(f"Online document processing result length: {len(result)}")
1430
+
1431
+ # Format the observation to continue the ReAct cycle
1432
+ tool_message = AIMessage(
1433
+ content=f"Observation: {result.strip()}"
1434
+ )
1435
+
1436
+ # Print the observation that will be sent back to the assistant
1437
+ print("\n=== TOOL OBSERVATION ===")
1438
+ content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
1439
+ print(content_preview)
1440
+ print("=== END OBSERVATION ===\n")
1441
+
1442
+ # Return the updated state
1443
+ return {
1444
+ "messages": state["messages"] + [tool_message],
1445
+ "current_tool": None, # Reset the current tool
1446
+ "action_input": None # Clear the action input
1447
+ }
1448
+
1449
  # Router function to direct to the correct tool
1450
  def router(state: AgentState) -> str:
1451
  """Route to the appropriate tool based on the current_tool field."""
 
1476
  return "process_image"
1477
  elif tool == "read_file":
1478
  return "read_file"
1479
+ elif tool == "process_online_document":
1480
+ return "process_online_document"
1481
  else:
1482
  return "end"
1483
 
 
1499
  builder.add_node("transcribe_audio", transcribe_audio_node)
1500
  builder.add_node("process_image", process_image_node)
1501
  builder.add_node("read_file", read_file_node)
1502
+ builder.add_node("process_online_document", process_online_document_node)
1503
 
1504
  # Define edges: these determine how the control flow moves
1505
  builder.add_edge(START, "assistant")
 
1535
  "transcribe_audio": "transcribe_audio",
1536
  "process_image": "process_image",
1537
  "read_file": "read_file",
1538
+ "process_online_document": "process_online_document",
1539
  "end": END
1540
  }
1541
  )
 
1552
  builder.add_edge("transcribe_audio", "assistant")
1553
  builder.add_edge("process_image", "assistant")
1554
  builder.add_edge("read_file", "assistant")
1555
+ builder.add_edge("process_online_document", "assistant")
1556
 
1557
  # Compile the graph
1558
  return builder.compile()
app.py CHANGED
@@ -257,7 +257,7 @@ with gr.Blocks(title="TurboNerd Agent🤓") as demo:
257
  ## Chat with TurboNerd 🤓
258
  Ask any question and get an answer from TurboNerd. The agent can search the web, Wikipedia, analyze images, process audio, and more!
259
 
260
- ### Complex Example Questions:
261
 
262
  **Research & Analysis:**
263
  - "Find the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists. Cross-reference this information with their Wikipedia page and any recent news articles."
@@ -278,8 +278,25 @@ with gr.Blocks(title="TurboNerd Agent🤓") as demo:
278
  with gr.Column(scale=4):
279
  chatbot = gr.Chatbot(
280
  label="Conversation",
281
- height=300,
282
- type="messages"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  )
284
  with gr.Row():
285
  question_input = gr.Textbox(
 
257
  ## Chat with TurboNerd 🤓
258
  Ask any question and get an answer from TurboNerd. The agent can search the web, Wikipedia, analyze images, process audio, and more!
259
 
260
+ ### Example Questions:
261
 
262
  **Research & Analysis:**
263
  - "Find the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists. Cross-reference this information with their Wikipedia page and any recent news articles."
 
278
  with gr.Column(scale=4):
279
  chatbot = gr.Chatbot(
280
  label="Conversation",
281
+ height=600,
282
+ type="messages",
283
+ elem_id="chatbot",
284
+ show_copy_button=True,
285
+ show_label=True,
286
+ container=True,
287
+ bubble_full_width=False,
288
+ rtl=False,
289
+ show_share_button=False,
290
+ show_retry_button=True,
291
+ show_clear_button=True,
292
+ avatar_images=(None, None),
293
+ likeable=False,
294
+ layout="panel",
295
+ min_width=600,
296
+ max_width=1200,
297
+ scale=1,
298
+ autoscroll=True,
299
+ elem_classes=["chatbot-container"]
300
  )
301
  with gr.Row():
302
  question_input = gr.Textbox(
requirements.txt CHANGED
@@ -22,3 +22,5 @@ openai
22
  openpyxl
23
  Pillow
24
  numpy
 
 
 
22
  openpyxl
23
  Pillow
24
  numpy
25
+ PyPDF2
26
+ pymupdf
tools.py CHANGED
@@ -1339,6 +1339,114 @@ def read_file(file_path: str, file_content: Optional[bytes] = None, line_start:
1339
  print(f"Warning: Could not delete temporary file {temp_path}: {e}")
1340
  # Non-fatal error, don't propagate exception
1341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1342
  # Define the tools configuration
1343
  tools_config = [
1344
  {
@@ -1390,5 +1498,10 @@ tools_config = [
1390
  "name": "read_file",
1391
  "description": "Read and display the contents of a text file (.py, .txt, etc.). You can provide a file path or use a file attachment. Optionally specify line range to read a specific portion of the file.",
1392
  "func": read_file
 
 
 
 
 
1393
  }
1394
  ]
 
1339
  print(f"Warning: Could not delete temporary file {temp_path}: {e}")
1340
  # Non-fatal error, don't propagate exception
1341
 
1342
+ def process_online_document(url: str, doc_type: str = "auto") -> str:
1343
+ """
1344
+ Process and analyze online PDFs and images.
1345
+
1346
+ Args:
1347
+ url: URL of the document or image
1348
+ doc_type: Type of document ("pdf", "image", or "auto" for automatic detection)
1349
+
1350
+ Returns:
1351
+ Analysis of the document content
1352
+ """
1353
+ try:
1354
+ # Validate URL
1355
+ parsed_url = urlparse(url)
1356
+ if not parsed_url.scheme or not parsed_url.netloc:
1357
+ return f"Error: Invalid URL format: {url}. Please provide a valid URL with http:// or https:// prefix."
1358
+
1359
+ # Block potentially dangerous URLs
1360
+ blocked_domains = [
1361
+ "localhost", "127.0.0.1", "0.0.0.0",
1362
+ "192.168.", "10.0.", "172.16.", "172.17.", "172.18.", "172.19.", "172.20.",
1363
+ "172.21.", "172.22.", "172.23.", "172.24.", "172.25.", "172.26.", "172.27.",
1364
+ "172.28.", "172.29.", "172.30.", "172.31."
1365
+ ]
1366
+
1367
+ if any(domain in parsed_url.netloc for domain in blocked_domains):
1368
+ return f"Error: Access to internal/local URLs is blocked for security: {url}"
1369
+
1370
+ print(f"Processing online document: {url}")
1371
+
1372
+ # Set headers to mimic a browser
1373
+ headers = {
1374
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
1375
+ 'Accept': 'text/html,application/xhtml+xml,application/pdf,image/*,*/*;q=0.8',
1376
+ 'Accept-Language': 'en-US,en;q=0.5',
1377
+ 'Connection': 'keep-alive',
1378
+ }
1379
+
1380
+ # Download the file
1381
+ response = requests.get(url, headers=headers, stream=True, timeout=15)
1382
+ response.raise_for_status()
1383
+
1384
+ # Determine content type
1385
+ content_type = response.headers.get('content-type', '').lower()
1386
+
1387
+ # Create a temporary file to save the content
1388
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
1389
+ temp_file.write(response.content)
1390
+ temp_path = temp_file.name
1391
+
1392
+ try:
1393
+ # Process based on content type or specified doc_type
1394
+ if doc_type == "auto":
1395
+ if "pdf" in content_type or url.lower().endswith('.pdf'):
1396
+ doc_type = "pdf"
1397
+ elif any(img_type in content_type for img_type in ['jpeg', 'png', 'gif', 'bmp', 'webp']):
1398
+ doc_type = "image"
1399
+ else:
1400
+ return f"Error: Unsupported content type: {content_type}"
1401
+
1402
+ if doc_type == "pdf":
1403
+ try:
1404
+ import PyPDF2
1405
+ with open(temp_path, 'rb') as file:
1406
+ pdf_reader = PyPDF2.PdfReader(file)
1407
+ text_content = ""
1408
+ for page in pdf_reader.pages:
1409
+ text_content += page.extract_text() + "\n"
1410
+
1411
+ # Get metadata
1412
+ metadata = pdf_reader.metadata
1413
+ result = "PDF Analysis:\n\n"
1414
+ if metadata:
1415
+ result += "Metadata:\n"
1416
+ for key, value in metadata.items():
1417
+ if value:
1418
+ result += f"- {key}: {value}\n"
1419
+ result += "\n"
1420
+
1421
+ result += f"Number of pages: {len(pdf_reader.pages)}\n\n"
1422
+ result += "Content:\n"
1423
+ result += text_content[:8000] # Limit content length
1424
+ if len(text_content) > 8000:
1425
+ result += "\n\n[Content truncated due to length...]"
1426
+
1427
+ return result
1428
+ except ImportError:
1429
+ return "Error: PyPDF2 library is required for PDF processing. Please install it using 'pip install PyPDF2'"
1430
+
1431
+ elif doc_type == "image":
1432
+ # Use the existing process_image function
1433
+ return process_image(temp_path, url=url)
1434
+
1435
+ else:
1436
+ return f"Error: Unsupported document type: {doc_type}"
1437
+
1438
+ finally:
1439
+ # Clean up the temporary file
1440
+ try:
1441
+ os.unlink(temp_path)
1442
+ except Exception as e:
1443
+ print(f"Warning: Could not delete temporary file {temp_path}: {e}")
1444
+
1445
+ except requests.exceptions.RequestException as e:
1446
+ return f"Error accessing URL {url}: {str(e)}"
1447
+ except Exception as e:
1448
+ return f"Error processing online document: {str(e)}"
1449
+
1450
  # Define the tools configuration
1451
  tools_config = [
1452
  {
 
1498
  "name": "read_file",
1499
  "description": "Read and display the contents of a text file (.py, .txt, etc.). You can provide a file path or use a file attachment. Optionally specify line range to read a specific portion of the file.",
1500
  "func": read_file
1501
+ },
1502
+ {
1503
+ "name": "process_online_document",
1504
+ "description": "Process and analyze online PDFs and images. Provide a URL and optionally specify the document type ('pdf', 'image', or 'auto').",
1505
+ "func": process_online_document
1506
  }
1507
  ]