Spaces:
Running
Running
File size: 1,435 Bytes
0ea40d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# ./tools.py
"""
The Research & Extraction Engine - The module handles "Web Search" via Tavily and the parsing of uploaded files (PDFs, Python scripts, etc.)
"""
import os
from tavily import TavilyClient
from pypdf import PdfReader
import docx
# Initialize Tavily
tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
def web_search(query: str):
"""Perform a technical search for documentation or latest AI trends."""
search_result = tavily.search(query=query, search_depth="advanced", max_results=5)
context = "\n".join([f"Source: {r['url']}\nContent: {r['content']}" for r in search_result['results']])
return context
def parse_file(file_path):
"""Extract text from various file formats for the LLM to process."""
ext = os.path.splitext(file_path)[-1].lower()
text = f"--- File: {os.path.basename(file_path)} ---\n"
if ext == ".pdf":
reader = PdfReader(file_path)
for page in reader.pages:
text += page.extract_text()
elif ext == ".docx":
doc = docx.Document(file_path)
text += "\n".join([para.text for para in doc.paragraphs])
elif ext in [".py", ".txt", ".md", ".html", ".js", ".yaml", ".toml"]:
with open(file_path, "r", encoding="utf-8") as f:
text += f.read()
else:
text += "[Non-text file detected or unsupported format]"
return text + "\n---\n"
|