Spaces:
Runtime error
Runtime error
| import os | |
| import io | |
| import requests | |
| import logging | |
| from typing import Optional | |
| from agents import function_tool | |
| from docx import Document | |
| import PyPDF2 | |
| from .firebase_config import db | |
| # Set up logging | |
| logger = logging.getLogger(__name__) | |
| def read_document_data(query: str, source: str = "auto") -> str: | |
| """ | |
| Read and search for information from documents stored locally or in Firebase Firestore. | |
| Args: | |
| query: The search query or topic to look for in the documents | |
| source: Data source - "local" for local files, "firestore" for Firebase, or "auto" to try both | |
| Returns: | |
| The relevant content from the document(s) matching the query | |
| """ | |
| logger.info(f"TOOL CALL: read_document_data called with query='{query}', source='{source}'") | |
| result = [] | |
| # Try local files first if source is "local" or "auto" | |
| if source in ["local", "auto"]: | |
| local_content = _read_local_documents(query) | |
| if local_content: | |
| result.append(f"=== Local Documents ===\n{local_content}") | |
| # Try Firestore if source is "firestore" or "auto" (and local didn't return results) | |
| if source in ["firestore", "auto"] and (not result or source == "firestore"): | |
| firestore_content = _read_firestore_documents(query) | |
| if firestore_content: | |
| result.append(f"=== Firestore Documents ===\n{firestore_content}") | |
| if result: | |
| response = "\n\n".join(result) | |
| logger.info(f"TOOL RESULT: read_document_data found {len(result)} result(s)") | |
| return response | |
| else: | |
| response = f"No relevant information found for query: '{query}'. Please check if documents are available." | |
| logger.info(f"TOOL RESULT: read_document_data found no results for query='{query}'") | |
| return response | |
| def _read_local_documents(query: str) -> Optional[str]: | |
| """Read from local PDF and DOCX files in the root directory.""" | |
| root_dir = os.path.dirname(os.path.dirname(__file__)) | |
| content_parts = [] | |
| # Try to read DOCX file | |
| docx_path = os.path.join(root_dir, "data.docx") | |
| if os.path.exists(docx_path): | |
| try: | |
| doc = Document(docx_path) | |
| full_text = [] | |
| for paragraph in doc.paragraphs: | |
| if paragraph.text.strip(): | |
| full_text.append(paragraph.text) | |
| docx_content = "\n".join(full_text) | |
| if docx_content: | |
| content_parts.append(f"[From data.docx]\n{docx_content}") | |
| except Exception as e: | |
| content_parts.append(f"Error reading data.docx: {str(e)}") | |
| # Try to read PDF files | |
| for file in os.listdir(root_dir): | |
| if file.endswith(".pdf"): | |
| pdf_path = os.path.join(root_dir, file) | |
| try: | |
| with open(pdf_path, "rb") as pdf_file: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| pdf_text = [] | |
| for page in pdf_reader.pages: | |
| text = page.extract_text() | |
| if text.strip(): | |
| pdf_text.append(text) | |
| if pdf_text: | |
| content_parts.append(f"[From {file}]\n" + "\n".join(pdf_text)) | |
| except Exception as e: | |
| content_parts.append(f"Error reading {file}: {str(e)}") | |
| return "\n\n".join(content_parts) if content_parts else None | |
| def _read_firestore_documents(query: str) -> Optional[str]: | |
| """Read documents from Firebase Firestore 'data' collection.""" | |
| if not db: | |
| return "Firebase Firestore is not initialized. Please check your serviceAccount.json file." | |
| try: | |
| # Query the 'data' collection | |
| docs_ref = db.collection("data") | |
| docs = docs_ref.stream() | |
| content_parts = [] | |
| for doc in docs: | |
| doc_data = doc.to_dict() | |
| # Check if document field contains a URL to a file | |
| document_url = doc_data.get("document") | |
| if document_url: | |
| # Download and read the document from URL | |
| try: | |
| doc_name = doc_data.get("name", doc.id) | |
| content = _read_document_from_url(document_url, doc_name) | |
| if content: | |
| content_parts.append(f"[From Firestore: {doc_name}]\n{content}") | |
| except Exception as e: | |
| content_parts.append(f"[Error reading {doc.id}]: {str(e)}") | |
| else: | |
| # Fallback: Try to extract content from different possible field names | |
| doc_content = ( | |
| doc_data.get("content") or | |
| doc_data.get("text") or | |
| doc_data.get("data") | |
| ) | |
| if doc_content: | |
| doc_name = doc_data.get("name", doc.id) | |
| content_parts.append(f"[From Firestore: {doc_name}]\n{doc_content}") | |
| return "\n\n".join(content_parts) if content_parts else None | |
| except Exception as e: | |
| return f"Error reading from Firestore: {str(e)}" | |
| def _read_document_from_url(url: str, doc_name: str) -> Optional[str]: | |
| """Download and read a document (DOCX or PDF) from a URL.""" | |
| try: | |
| # Download the file from URL | |
| response = requests.get(url, timeout=30) | |
| response.raise_for_status() | |
| # Determine file type from URL | |
| if url.lower().endswith('.docx') or 'docx' in url.lower(): | |
| # Read DOCX from bytes | |
| doc = Document(io.BytesIO(response.content)) | |
| full_text = [] | |
| for paragraph in doc.paragraphs: | |
| if paragraph.text.strip(): | |
| full_text.append(paragraph.text) | |
| return "\n".join(full_text) | |
| elif url.lower().endswith('.pdf') or 'pdf' in url.lower(): | |
| # Read PDF from bytes | |
| pdf_reader = PyPDF2.PdfReader(io.BytesIO(response.content)) | |
| pdf_text = [] | |
| for page in pdf_reader.pages: | |
| text = page.extract_text() | |
| if text.strip(): | |
| pdf_text.append(text) | |
| return "\n".join(pdf_text) | |
| else: | |
| return f"Unsupported file type for URL: {url}" | |
| except Exception as e: | |
| raise Exception(f"Failed to download/read document from {url}: {str(e)}") | |
| def list_available_documents() -> str: | |
| """ | |
| List all available documents from both local storage and Firestore. | |
| Returns: | |
| A formatted list of available documents from all sources | |
| """ | |
| logger.info("TOOL CALL: list_available_documents called") | |
| result = [] | |
| # List local documents | |
| root_dir = os.path.dirname(os.path.dirname(__file__)) | |
| local_docs = [] | |
| if os.path.exists(os.path.join(root_dir, "data.docx")): | |
| local_docs.append("- data.docx") | |
| for file in os.listdir(root_dir): | |
| if file.endswith(".pdf"): | |
| local_docs.append(f"- {file}") | |
| if local_docs: | |
| result.append("=== Local Documents ===\n" + "\n".join(local_docs)) | |
| # List Firestore documents | |
| if db: | |
| try: | |
| docs_ref = db.collection("data") | |
| docs = docs_ref.stream() | |
| firestore_docs = [f"- {doc.id}" for doc in docs] | |
| if firestore_docs: | |
| result.append("=== Firestore Documents ===\n" + "\n".join(firestore_docs)) | |
| except Exception as e: | |
| result.append(f"Error listing Firestore documents: {str(e)}") | |
| response = "\n\n".join(result) if result else "No documents found in any source." | |
| logger.info(f"TOOL RESULT: list_available_documents found {len(result)} source(s) with documents") | |
| return response | |