Spaces:
Running
Running
| # ./tools.py | |
| """ | |
| The Research & Extraction Engine - The module handles "Web Search" via Tavily and the parsing of uploaded files (PDFs, Python scripts, etc.) | |
| """ | |
| import os | |
| from tavily import TavilyClient | |
| from pypdf import PdfReader | |
| import docx | |
| # Initialize Tavily | |
| tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY")) | |
| def web_search(query: str): | |
| """Perform a technical search for documentation or latest AI trends.""" | |
| search_result = tavily.search(query=query, search_depth="advanced", max_results=5) | |
| context = "\n".join([f"Source: {r['url']}\nContent: {r['content']}" for r in search_result['results']]) | |
| return context | |
| def parse_file(file_path): | |
| """Extract text from various file formats for the LLM to process.""" | |
| ext = os.path.splitext(file_path)[-1].lower() | |
| text = f"--- File: {os.path.basename(file_path)} ---\n" | |
| if ext == ".pdf": | |
| reader = PdfReader(file_path) | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| elif ext == ".docx": | |
| doc = docx.Document(file_path) | |
| text += "\n".join([para.text for para in doc.paragraphs]) | |
| elif ext in [".py", ".txt", ".md", ".html", ".js", ".yaml", ".toml"]: | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| text += f.read() | |
| else: | |
| text += "[Non-text file detected or unsupported format]" | |
| return text + "\n---\n" | |