# ./tools.py """ The Research & Extraction Engine - The module handles "Web Search" via Tavily and the parsing of uploaded files (PDFs, Python scripts, etc.) """ import os from tavily import TavilyClient from pypdf import PdfReader import docx # Initialize Tavily tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY")) def web_search(query: str): """Perform a technical search for documentation or latest AI trends.""" search_result = tavily.search(query=query, search_depth="advanced", max_results=5) context = "\n".join([f"Source: {r['url']}\nContent: {r['content']}" for r in search_result['results']]) return context def parse_file(file_path): """Extract text from various file formats for the LLM to process.""" ext = os.path.splitext(file_path)[-1].lower() text = f"--- File: {os.path.basename(file_path)} ---\n" if ext == ".pdf": reader = PdfReader(file_path) for page in reader.pages: text += page.extract_text() elif ext == ".docx": doc = docx.Document(file_path) text += "\n".join([para.text for para in doc.paragraphs]) elif ext in [".py", ".txt", ".md", ".html", ".js", ".yaml", ".toml"]: with open(file_path, "r", encoding="utf-8") as f: text += f.read() else: text += "[Non-text file detected or unsupported format]" return text + "\n---\n"