File size: 1,435 Bytes
0ea40d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

# ./tools.py

"""

The Research & Extraction Engine - The module handles "Web Search" via Tavily and the parsing of uploaded files (PDFs, Python scripts, etc.)

"""

import os
from tavily import TavilyClient
from pypdf import PdfReader
import docx

# Initialize Tavily
tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))

def web_search(query: str):
    """Perform a technical search for documentation or latest AI trends."""
    search_result = tavily.search(query=query, search_depth="advanced", max_results=5)
    context = "\n".join([f"Source: {r['url']}\nContent: {r['content']}" for r in search_result['results']])
    return context

def parse_file(file_path):
    """Extract text from various file formats for the LLM to process."""
    ext = os.path.splitext(file_path)[-1].lower()
    text = f"--- File: {os.path.basename(file_path)} ---\n"
    
    if ext == ".pdf":
        reader = PdfReader(file_path)
        for page in reader.pages:
            text += page.extract_text()
    elif ext == ".docx":
        doc = docx.Document(file_path)
        text += "\n".join([para.text for para in doc.paragraphs])
    elif ext in [".py", ".txt", ".md", ".html", ".js", ".yaml", ".toml"]:
        with open(file_path, "r", encoding="utf-8") as f:
            text += f.read()
    else:
        text += "[Non-text file detected or unsupported format]"
    
    return text + "\n---\n"