Spaces:
Runtime error
Runtime error
| from typing import Optional | |
| from dataclasses import dataclass | |
| import openai | |
| import google.generativeai as genai | |
| from config import config | |
| from vector_store import vector_store, SearchResult | |
| SYSTEM_PROMPT = """You are a research assistant that answers questions based on the provided document context. | |
| Instructions: | |
| 1. Answer ONLY based on the provided context | |
| 2. If the context doesn't contain enough information, say so | |
| 3. Cite sources by mentioning the paper name and section | |
| 4. Be concise but thorough | |
| 5. Format responses with markdown for readability""" | |
| class QueryResponse: | |
| answer: str | |
| sources: list[dict] | |
| arxiv_fetched: Optional[str] = None | |
| class DocumentAgent: | |
| def __init__(self): | |
| self.conversation_history: list[dict] = [] | |
| if config.GEMINI_API_KEY: | |
| genai.configure(api_key=config.GEMINI_API_KEY) | |
| def query(self, question: str, paper_filter: Optional[str] = None) -> QueryResponse: | |
| results = vector_store.search(question, paper_filter=paper_filter) | |
| if not results: | |
| return QueryResponse( | |
| answer="I don't have any documents indexed yet. Please upload a PDF first.", | |
| sources=[] | |
| ) | |
| context = self._build_context(results) | |
| answer = self._generate_answer(question, context) | |
| sources = self._format_sources(results) | |
| return QueryResponse(answer=answer, sources=sources) | |
| def _build_context(self, results: list[SearchResult]) -> str: | |
| context_parts = [] | |
| for r in results: | |
| context_parts.append( | |
| f"[Source: {r.chunk.paper_name} | Section: {r.chunk.section_title}]\n{r.chunk.content}" | |
| ) | |
| return "\n\n---\n\n".join(context_parts) | |
| def _generate_answer(self, question: str, context: str) -> str: | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"} | |
| ] | |
| if config.OPENROUTER_API_KEY: | |
| try: | |
| client = openai.OpenAI( | |
| api_key=config.OPENROUTER_API_KEY, | |
| base_url=config.OPENROUTER_BASE_URL | |
| ) | |
| response = client.chat.completions.create( | |
| model=config.OPENROUTER_MODEL, | |
| messages=messages, | |
| max_tokens=1500, | |
| temperature=0.3 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception: | |
| pass | |
| if config.GEMINI_API_KEY: | |
| try: | |
| model = genai.GenerativeModel(config.GEMINI_MODEL) | |
| prompt = f"{SYSTEM_PROMPT}\n\nContext:\n{context}\n\nQuestion: {question}" | |
| response = model.generate_content(prompt) | |
| return response.text | |
| except Exception: | |
| pass | |
| return "Unable to generate response. Please check API configuration." | |
| def _format_sources(self, results: list[SearchResult]) -> list[dict]: | |
| seen = set() | |
| sources = [] | |
| for r in results: | |
| key = f"{r.chunk.paper_name}:{r.chunk.section_title}" | |
| if key not in seen: | |
| seen.add(key) | |
| sources.append({ | |
| "paper": r.chunk.paper_name, | |
| "section": r.chunk.section_title or "Content" | |
| }) | |
| return sources | |
| def clear_history(self): | |
| self.conversation_history = [] | |
| agent = DocumentAgent() | |