Spaces:

prashantmatlani
/

coderg

Running

coderg / tools.py

initial commit

0ea40d5 2 days ago

1.44 kB


	# ./tools.py

	"""
	The Research & Extraction Engine - The module handles "Web Search" via Tavily and the parsing of uploaded files (PDFs, Python scripts, etc.)
	"""

	import os
	from tavily import TavilyClient
	from pypdf import PdfReader
	import docx

	# Initialize Tavily
	tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))

	def web_search(query: str):
	"""Perform a technical search for documentation or latest AI trends."""
	search_result = tavily.search(query=query, search_depth="advanced", max_results=5)
	context = "\n".join([f"Source: {r['url']}\nContent: {r['content']}" for r in search_result['results']])
	return context

	def parse_file(file_path):
	"""Extract text from various file formats for the LLM to process."""
	ext = os.path.splitext(file_path)[-1].lower()
	text = f"--- File: {os.path.basename(file_path)} ---\n"

	if ext == ".pdf":
	reader = PdfReader(file_path)
	for page in reader.pages:
	text += page.extract_text()
	elif ext == ".docx":
	doc = docx.Document(file_path)
	text += "\n".join([para.text for para in doc.paragraphs])
	elif ext in [".py", ".txt", ".md", ".html", ".js", ".yaml", ".toml"]:
	with open(file_path, "r", encoding="utf-8") as f:
	text += f.read()
	else:
	text += "[Non-text file detected or unsupported format]"

	return text + "\n---\n"