Spaces:
Sleeping
Sleeping
| """Unified application with toggle between Simple and Multi-Agent modes.""" | |
| import gradio as gr | |
| import asyncio | |
| import logging | |
| from pathlib import Path | |
| from typing import Tuple | |
| # Import spaces for HuggingFace GPU decorator | |
| try: | |
| import spaces | |
| HF_SPACES = True | |
| except ImportError: | |
| HF_SPACES = False | |
| # Simple mode imports | |
| from openai import OpenAI | |
| from src.config import get_config | |
| from src.document_processing.loader import MarkdownDocumentLoader | |
| from src.document_processing.chunker import SemanticChunker | |
| from src.indexing.memory_indexer import MemoryDocumentIndexer | |
| from src.retrieval.memory_retriever import MemoryRetriever | |
| from src.ui.formatters import ChunkFormatter | |
| # Multi-agent mode imports - completely lazy, only import when needed | |
| MULTI_AGENT_AVAILABLE = True # Assume available, will check when needed | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Dummy GPU function to satisfy HuggingFace Spaces GPU check | |
| if HF_SPACES: | |
| def _hf_spaces_gpu_warmup(): | |
| """Dummy function to satisfy HF Spaces GPU decorator requirement.""" | |
| return "GPU ready" | |
| class UnifiedAssistant: | |
| """Unified assistant supporting both simple and multi-agent modes.""" | |
| def _format_documents_html(documents) -> str: | |
| """Format Haystack documents as simple HTML.""" | |
| if not documents: | |
| return "<p>No documents retrieved.</p>" | |
| html_parts = [] | |
| for i, doc in enumerate(documents, 1): | |
| source = doc.meta.get("source_file", "Unknown") if hasattr(doc, 'meta') and doc.meta else "Unknown" | |
| score = getattr(doc, 'score', 0.0) | |
| content = doc.content if hasattr(doc, 'content') else str(doc) | |
| # Truncate long content | |
| preview = content[:200] + "..." if len(content) > 200 else content | |
| html_parts.append(f""" | |
| <div style="border: 1px solid #ddd; border-radius: 8px; padding: 16px; margin-bottom: 16px; background: #f9f9f9; color: #1f2937;"> | |
| <div style="margin-bottom: 12px; color: #1f2937;"> | |
| <strong>#{i}</strong> | | |
| <span style="color: #2563eb;">📄 {source}</span> | | |
| <span style="color: #666;">Score: {score:.3f}</span> | |
| </div> | |
| <details> | |
| <summary style="cursor: pointer; padding: 8px; background: white; border-radius: 4px; margin-bottom: 8px; color: #1f2937;"> | |
| <strong>Preview:</strong> {preview} | |
| </summary> | |
| <div style="padding: 12px; background: white; border-radius: 4px; margin-top: 8px; white-space: pre-wrap; font-size: 0.9em; color: #1f2937;"> | |
| {content} | |
| </div> | |
| </details> | |
| </div> | |
| """) | |
| return "".join(html_parts) | |
| def __init__(self): | |
| self.config = get_config() | |
| self.client = OpenAI(api_key=self.config.llm.api_key) | |
| # Load documents (shared by both modes) | |
| self.indexer = MemoryDocumentIndexer(llm_config=self.config.llm) | |
| self._load_or_create_documents() | |
| # Initialize retriever (for simple mode) | |
| self.retriever = MemoryRetriever( | |
| document_store=self.indexer.document_store, | |
| llm_config=self.config.llm, | |
| retrieval_config=self.config.retrieval, | |
| ) | |
| # Initialize orchestrator (for multi-agent mode) - lazy | |
| self.orchestrator = None | |
| def _load_or_create_documents(self): | |
| """Load documents from JSON or create fresh.""" | |
| import json | |
| from haystack import Document as HaystackDoc | |
| json_path = Path("data/embedded_documents.json") | |
| if json_path.exists(): | |
| logger.info(f"Loading embedded documents from {json_path}...") | |
| try: | |
| with open(json_path, "r") as f: | |
| docs_data = json.load(f) | |
| documents = [] | |
| for doc_data in docs_data: | |
| doc = HaystackDoc( | |
| id=doc_data.get("id"), | |
| content=doc_data["content"], | |
| embedding=doc_data.get("embedding"), | |
| meta=doc_data.get("meta", {}) | |
| ) | |
| documents.append(doc) | |
| self.indexer.document_store.write_documents(documents) | |
| logger.info(f"Loaded {len(documents)} documents with embeddings") | |
| return | |
| except Exception as e: | |
| logger.warning(f"Failed to load documents: {e}") | |
| # Create documents if not found | |
| logger.info("Creating fresh document index...") | |
| loader = MarkdownDocumentLoader(self.config.document_processing.documents_path) | |
| documents = loader.load_documents() | |
| chunker = SemanticChunker( | |
| chunk_size=self.config.document_processing.chunk_size, | |
| chunk_overlap=self.config.document_processing.chunk_overlap, | |
| min_chunk_size=self.config.document_processing.min_chunk_size, | |
| ) | |
| chunked_docs = chunker.chunk_documents(documents) | |
| self.indexer.index_documents(chunked_docs) | |
| def _get_orchestrator(self): | |
| """Lazy load orchestrator for multi-agent mode.""" | |
| if self.orchestrator is None: | |
| try: | |
| from src.pipeline.memory_orchestrator import MemoryRAGOrchestrator | |
| self.orchestrator = MemoryRAGOrchestrator( | |
| config=self.config, | |
| document_indexer=self.indexer # Correct parameter name | |
| ) | |
| except ImportError as e: | |
| raise RuntimeError(f"Multi-agent mode is not available: {e}") | |
| return self.orchestrator | |
| def process_query_simple(self, query: str) -> Tuple[str, str, str]: | |
| """Process query with simple single-LLM mode (fast).""" | |
| logger.info(f"[SIMPLE MODE] Processing query: {query}") | |
| # Retrieve documents | |
| retrieved_docs = self.retriever.retrieve(query) | |
| logger.info(f"Retrieved {len(retrieved_docs)} documents") | |
| # Build context | |
| max_docs = 2 if "gpt-5" in self.config.llm.model_name else 3 | |
| max_chars_per_doc = 800 if "gpt-5" in self.config.llm.model_name else 1500 | |
| context_parts = [] | |
| for i, doc in enumerate(retrieved_docs[:max_docs], 1): | |
| source = doc.meta.get("source_file", "Unknown") | |
| content = doc.content[:max_chars_per_doc] | |
| context_parts.append(f"[Dokument {i}: {source}]\n{content}\n") | |
| context = "\n".join(context_parts) if context_parts else "Keine relevanten Dokumente gefunden." | |
| # Generate email with single LLM call | |
| system_prompt = """Du bist ein hilfreicher Assistent für die Studienadministration der BFH. | |
| Deine Aufgabe ist es, professionelle E-Mail-Antworten auf Studentenanfragen zu verfassen. | |
| Richtlinien: | |
| - Antworte in der gleichen Sprache wie die Anfrage (Deutsch, Englisch oder Französisch) | |
| - Verwende einen professionellen aber freundlichen Ton | |
| - Sei klar, präzise und hilfreich | |
| - Beziehe dich auf konkrete Formulare, Fristen oder Verfahren wenn relevant | |
| - Gib klare nächste Schritte an | |
| - Wenn Informationen fehlen, sage dies ehrlich | |
| Für deutsche E-Mails: | |
| - Verwende die formelle "Sie"-Form | |
| - Grußformel: "Guten Tag" oder "Sehr geehrte/r..." | |
| - Schlussformel: "Freundliche Grüsse" oder "Mit freundlichen Grüssen" | |
| """ | |
| user_prompt = f"""Beantworte die folgende Anfrage basierend auf den verfügbaren Informationen: | |
| Anfrage: {query} | |
| Verfügbare Informationen: | |
| {context} | |
| Verfasse eine vollständige professionelle E-Mail-Antwort.""" | |
| try: | |
| # GPT-5 models have different parameter requirements | |
| completion_params = { | |
| "model": self.config.llm.model_name, | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| } | |
| # GPT-5 uses max_completion_tokens and supports reasoning_effort parameter | |
| if "gpt-5" in self.config.llm.model_name: | |
| completion_params["max_completion_tokens"] = self.config.llm.max_tokens | |
| # Don't set temperature for GPT-5 (only supports default 1.0) | |
| # Use minimal reasoning effort to get actual output instead of all reasoning tokens | |
| completion_params["reasoning_effort"] = "minimal" | |
| else: | |
| completion_params["max_tokens"] = self.config.llm.max_tokens | |
| completion_params["temperature"] = self.config.llm.temperature | |
| response = self.client.chat.completions.create(**completion_params) | |
| logger.info(f"[DEBUG] Response object: {response}") | |
| logger.info(f"[DEBUG] Response.choices: {response.choices}") | |
| email = response.choices[0].message.content | |
| if email is None or email.strip() == "": | |
| logger.error(f"LLM returned null or empty response!") | |
| logger.error(f"[DEBUG] Full response: {response.model_dump()}") | |
| email = "Error: The model returned an empty response. Please try again." | |
| except Exception as e: | |
| logger.error(f"Error generating email: {e}") | |
| email = f"Error generating response: {str(e)}" | |
| # Format chunks for display | |
| chunks_html = self._format_documents_html(retrieved_docs) | |
| # Create metadata | |
| metadata = f"""**Mode**: Simple (Single LLM call) | |
| **Model**: {self.config.llm.model_name} | |
| **Documents Retrieved**: {len(retrieved_docs)} | |
| **Documents Used**: {min(len(retrieved_docs), max_docs)} | |
| """ | |
| return email, chunks_html, metadata | |
| async def process_query_multi_agent(self, query: str) -> Tuple[str, str, str]: | |
| """Process query with multi-agent mode (high quality, async parallel).""" | |
| logger.info(f"[MULTI-AGENT MODE] Processing query: {query}") | |
| orchestrator = self._get_orchestrator() | |
| result = await orchestrator.process_query(query) | |
| # Format email | |
| email = f"""Subject: {result.email_draft.subject} | |
| {result.email_draft.body}""" | |
| # Format chunks - result.retrieved_docs are dicts, need to convert back | |
| from haystack import Document as HaystackDoc | |
| docs = [] | |
| for doc_dict in result.retrieved_docs: | |
| if isinstance(doc_dict, dict): | |
| doc = HaystackDoc( | |
| content=doc_dict.get('content', ''), | |
| meta=doc_dict.get('meta', {}), | |
| id=doc_dict.get('id') | |
| ) | |
| if 'score' in doc_dict: | |
| doc.score = doc_dict['score'] | |
| docs.append(doc) | |
| chunks_html = self._format_documents_html(docs) | |
| # Create metadata | |
| mode_type = "Parallel ⚡" if self.config.use_parallel_processing else "Sequential 🐌" | |
| metadata = f"""**Mode**: Multi-Agent ({mode_type}) | |
| **Model**: {self.config.llm.model_name} | |
| **Processing Time**: {result.processing_time:.1f}s | |
| **Documents Retrieved**: {len(result.retrieved_docs)} | |
| **Intent Detected**: | |
| - Action: {result.intent.action_type} | |
| - Topic: {result.intent.topic} | |
| - Language: {result.intent.language} | |
| **Fact Check**: | |
| - Accuracy: {result.fact_check.accuracy_score:.0%} | |
| - Status: {'✓ Accurate' if result.fact_check.is_accurate else '⚠ Issues Found'} | |
| - Issues: {len(result.fact_check.issues_found)} | |
| """ | |
| if result.fact_check.issues_found: | |
| metadata += "\n**Issues**:\n" | |
| for issue in result.fact_check.issues_found[:3]: # Show first 3 | |
| metadata += f"- {issue}\n" | |
| return email, chunks_html, metadata | |
| def process_query(self, query: str, mode: str) -> Tuple[str, str, str]: | |
| """Process query with selected mode.""" | |
| if not query or not query.strip(): | |
| return "Please enter a query.", "", "" | |
| try: | |
| if mode == "Simple (Fast)": | |
| return self.process_query_simple(query) | |
| else: # Multi-Agent | |
| # Run async function (will check for dependencies inside) | |
| return asyncio.run(self.process_query_multi_agent(query)) | |
| except Exception as e: | |
| logger.error(f"Error processing query: {e}", exc_info=True) | |
| return f"Error: {str(e)}", "", "" | |
| # Initialize assistant | |
| logger.info("Initializing Unified Assistant...") | |
| assistant = UnifiedAssistant() | |
| logger.info("Assistant ready!") | |
| # Example queries | |
| EXAMPLE_QUERIES = [ | |
| "Wie kann ich mich exmatrikulieren?", | |
| "What are the deadlines for leave of absence?", | |
| "Wie ändere ich meinen Namen in den Studiendokumenten?", | |
| "Welche Versicherungen brauche ich als Student?", | |
| ] | |
| # Create Gradio interface | |
| with gr.Blocks(title="BFH Student Administration Assistant") as demo: | |
| gr.Markdown("# 🎓 BFH Student Administration Email Assistant") | |
| gr.Markdown(""" | |
| Ask questions about BFH student administration and receive professional email responses. | |
| **Modes**: | |
| - **Simple (Fast)**: Single LLM call (~5-10s) - Best for quick responses | |
| - **Multi-Agent (Quality)**: Intent + Compose + Fact-Check (~60-75s) - Best for accuracy | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| mode_radio = gr.Radio( | |
| choices=["Simple (Fast)", "Multi-Agent (Quality)"], | |
| value="Simple (Fast)", | |
| label="Processing Mode", | |
| info="Simple mode is faster, Multi-Agent provides higher quality" | |
| ) | |
| query_input = gr.Textbox( | |
| label="Your Question", | |
| placeholder="e.g., Wie kann ich mich exmatrikulieren?", | |
| lines=3 | |
| ) | |
| submit_btn = gr.Button("Generate Email Response", variant="primary") | |
| gr.Examples( | |
| examples=EXAMPLE_QUERIES, | |
| inputs=query_input, | |
| label="Example Questions" | |
| ) | |
| with gr.Column(scale=3): | |
| email_output = gr.Textbox( | |
| label="Generated Email", | |
| lines=15, | |
| show_copy_button=True | |
| ) | |
| metadata_output = gr.Markdown(label="Processing Info") | |
| with gr.Accordion("Retrieved Source Documents", open=False): | |
| chunks_output = gr.HTML(label="Source Chunks") | |
| submit_btn.click( | |
| fn=assistant.process_query, | |
| inputs=[query_input, mode_radio], | |
| outputs=[email_output, chunks_output, metadata_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |