Spaces:

awellis
/

bfh-studadmin-assist

Sleeping

App Files Files Community

bfh-studadmin-assist / app_unified.py

awellis

Add @spaces.GPU decorator to fix HF Spaces runtime error

36f54bf 6 months ago

raw

history blame contribute delete

14.7 kB

	"""Unified application with toggle between Simple and Multi-Agent modes."""

	import gradio as gr
	import asyncio
	import logging
	from pathlib import Path
	from typing import Tuple

	# Import spaces for HuggingFace GPU decorator
	try:
	import spaces
	HF_SPACES = True
	except ImportError:
	HF_SPACES = False

	# Simple mode imports
	from openai import OpenAI
	from src.config import get_config
	from src.document_processing.loader import MarkdownDocumentLoader
	from src.document_processing.chunker import SemanticChunker
	from src.indexing.memory_indexer import MemoryDocumentIndexer
	from src.retrieval.memory_retriever import MemoryRetriever
	from src.ui.formatters import ChunkFormatter

	# Multi-agent mode imports - completely lazy, only import when needed
	MULTI_AGENT_AVAILABLE = True # Assume available, will check when needed

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	# Dummy GPU function to satisfy HuggingFace Spaces GPU check
	if HF_SPACES:
	@spaces.GPU
	def _hf_spaces_gpu_warmup():
	"""Dummy function to satisfy HF Spaces GPU decorator requirement."""
	return "GPU ready"


	class UnifiedAssistant:
	"""Unified assistant supporting both simple and multi-agent modes."""

	@staticmethod
	def _format_documents_html(documents) -> str:
	"""Format Haystack documents as simple HTML."""
	if not documents:
	return "<p>No documents retrieved.</p>"

	html_parts = []
	for i, doc in enumerate(documents, 1):
	source = doc.meta.get("source_file", "Unknown") if hasattr(doc, 'meta') and doc.meta else "Unknown"
	score = getattr(doc, 'score', 0.0)
	content = doc.content if hasattr(doc, 'content') else str(doc)

	# Truncate long content
	preview = content[:200] + "..." if len(content) > 200 else content

	html_parts.append(f"""
	<div style="border: 1px solid #ddd; border-radius: 8px; padding: 16px; margin-bottom: 16px; background: #f9f9f9; color: #1f2937;">
	<div style="margin-bottom: 12px; color: #1f2937;">
	<strong>#{i}</strong> \|
	<span style="color: #2563eb;">📄 {source}</span> \|
	<span style="color: #666;">Score: {score:.3f}</span>
	</div>
	<details>
	<summary style="cursor: pointer; padding: 8px; background: white; border-radius: 4px; margin-bottom: 8px; color: #1f2937;">
	<strong>Preview:</strong> {preview}
	</summary>
	<div style="padding: 12px; background: white; border-radius: 4px; margin-top: 8px; white-space: pre-wrap; font-size: 0.9em; color: #1f2937;">
	{content}
	</div>
	</details>
	</div>
	""")

	return "".join(html_parts)

	def __init__(self):
	self.config = get_config()
	self.client = OpenAI(api_key=self.config.llm.api_key)

	# Load documents (shared by both modes)
	self.indexer = MemoryDocumentIndexer(llm_config=self.config.llm)
	self._load_or_create_documents()

	# Initialize retriever (for simple mode)
	self.retriever = MemoryRetriever(
	document_store=self.indexer.document_store,
	llm_config=self.config.llm,
	retrieval_config=self.config.retrieval,
	)

	# Initialize orchestrator (for multi-agent mode) - lazy
	self.orchestrator = None

	def _load_or_create_documents(self):
	"""Load documents from JSON or create fresh."""
	import json
	from haystack import Document as HaystackDoc

	json_path = Path("data/embedded_documents.json")

	if json_path.exists():
	logger.info(f"Loading embedded documents from {json_path}...")
	try:
	with open(json_path, "r") as f:
	docs_data = json.load(f)

	documents = []
	for doc_data in docs_data:
	doc = HaystackDoc(
	id=doc_data.get("id"),
	content=doc_data["content"],
	embedding=doc_data.get("embedding"),
	meta=doc_data.get("meta", {})
	)
	documents.append(doc)

	self.indexer.document_store.write_documents(documents)
	logger.info(f"Loaded {len(documents)} documents with embeddings")
	return
	except Exception as e:
	logger.warning(f"Failed to load documents: {e}")

	# Create documents if not found
	logger.info("Creating fresh document index...")
	loader = MarkdownDocumentLoader(self.config.document_processing.documents_path)
	documents = loader.load_documents()

	chunker = SemanticChunker(
	chunk_size=self.config.document_processing.chunk_size,
	chunk_overlap=self.config.document_processing.chunk_overlap,
	min_chunk_size=self.config.document_processing.min_chunk_size,
	)
	chunked_docs = chunker.chunk_documents(documents)
	self.indexer.index_documents(chunked_docs)

	def _get_orchestrator(self):
	"""Lazy load orchestrator for multi-agent mode."""
	if self.orchestrator is None:
	try:
	from src.pipeline.memory_orchestrator import MemoryRAGOrchestrator
	self.orchestrator = MemoryRAGOrchestrator(
	config=self.config,
	document_indexer=self.indexer # Correct parameter name
	)
	except ImportError as e:
	raise RuntimeError(f"Multi-agent mode is not available: {e}")
	return self.orchestrator

	def process_query_simple(self, query: str) -> Tuple[str, str, str]:
	"""Process query with simple single-LLM mode (fast)."""
	logger.info(f"[SIMPLE MODE] Processing query: {query}")

	# Retrieve documents
	retrieved_docs = self.retriever.retrieve(query)
	logger.info(f"Retrieved {len(retrieved_docs)} documents")

	# Build context
	max_docs = 2 if "gpt-5" in self.config.llm.model_name else 3
	max_chars_per_doc = 800 if "gpt-5" in self.config.llm.model_name else 1500

	context_parts = []
	for i, doc in enumerate(retrieved_docs[:max_docs], 1):
	source = doc.meta.get("source_file", "Unknown")
	content = doc.content[:max_chars_per_doc]
	context_parts.append(f"[Dokument {i}: {source}]\n{content}\n")

	context = "\n".join(context_parts) if context_parts else "Keine relevanten Dokumente gefunden."

	# Generate email with single LLM call
	system_prompt = """Du bist ein hilfreicher Assistent für die Studienadministration der BFH.

	Deine Aufgabe ist es, professionelle E-Mail-Antworten auf Studentenanfragen zu verfassen.

	Richtlinien:
	- Antworte in der gleichen Sprache wie die Anfrage (Deutsch, Englisch oder Französisch)
	- Verwende einen professionellen aber freundlichen Ton
	- Sei klar, präzise und hilfreich
	- Beziehe dich auf konkrete Formulare, Fristen oder Verfahren wenn relevant
	- Gib klare nächste Schritte an
	- Wenn Informationen fehlen, sage dies ehrlich

	Für deutsche E-Mails:
	- Verwende die formelle "Sie"-Form
	- Grußformel: "Guten Tag" oder "Sehr geehrte/r..."
	- Schlussformel: "Freundliche Grüsse" oder "Mit freundlichen Grüssen"
	"""

	user_prompt = f"""Beantworte die folgende Anfrage basierend auf den verfügbaren Informationen:

	Anfrage: {query}

	Verfügbare Informationen:
	{context}

	Verfasse eine vollständige professionelle E-Mail-Antwort."""

	try:
	# GPT-5 models have different parameter requirements
	completion_params = {
	"model": self.config.llm.model_name,
	"messages": [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	}

	# GPT-5 uses max_completion_tokens and supports reasoning_effort parameter
	if "gpt-5" in self.config.llm.model_name:
	completion_params["max_completion_tokens"] = self.config.llm.max_tokens
	# Don't set temperature for GPT-5 (only supports default 1.0)
	# Use minimal reasoning effort to get actual output instead of all reasoning tokens
	completion_params["reasoning_effort"] = "minimal"
	else:
	completion_params["max_tokens"] = self.config.llm.max_tokens
	completion_params["temperature"] = self.config.llm.temperature

	response = self.client.chat.completions.create(**completion_params)

	logger.info(f"[DEBUG] Response object: {response}")
	logger.info(f"[DEBUG] Response.choices: {response.choices}")

	email = response.choices[0].message.content

	if email is None or email.strip() == "":
	logger.error(f"LLM returned null or empty response!")
	logger.error(f"[DEBUG] Full response: {response.model_dump()}")
	email = "Error: The model returned an empty response. Please try again."

	except Exception as e:
	logger.error(f"Error generating email: {e}")
	email = f"Error generating response: {str(e)}"

	# Format chunks for display
	chunks_html = self._format_documents_html(retrieved_docs)

	# Create metadata
	metadata = f"""Mode: Simple (Single LLM call)
	Model: {self.config.llm.model_name}
	Documents Retrieved: {len(retrieved_docs)}
	Documents Used: {min(len(retrieved_docs), max_docs)}
	"""

	return email, chunks_html, metadata

	async def process_query_multi_agent(self, query: str) -> Tuple[str, str, str]:
	"""Process query with multi-agent mode (high quality, async parallel)."""
	logger.info(f"[MULTI-AGENT MODE] Processing query: {query}")

	orchestrator = self._get_orchestrator()
	result = await orchestrator.process_query(query)

	# Format email
	email = f"""Subject: {result.email_draft.subject}

	{result.email_draft.body}"""

	# Format chunks - result.retrieved_docs are dicts, need to convert back
	from haystack import Document as HaystackDoc
	docs = []
	for doc_dict in result.retrieved_docs:
	if isinstance(doc_dict, dict):
	doc = HaystackDoc(
	content=doc_dict.get('content', ''),
	meta=doc_dict.get('meta', {}),
	id=doc_dict.get('id')
	)
	if 'score' in doc_dict:
	doc.score = doc_dict['score']
	docs.append(doc)

	chunks_html = self._format_documents_html(docs)

	# Create metadata
	mode_type = "Parallel ⚡" if self.config.use_parallel_processing else "Sequential 🐌"
	metadata = f"""Mode: Multi-Agent ({mode_type})
	Model: {self.config.llm.model_name}
	Processing Time: {result.processing_time:.1f}s
	Documents Retrieved: {len(result.retrieved_docs)}

	Intent Detected:
	- Action: {result.intent.action_type}
	- Topic: {result.intent.topic}
	- Language: {result.intent.language}

	Fact Check:
	- Accuracy: {result.fact_check.accuracy_score:.0%}
	- Status: {'✓ Accurate' if result.fact_check.is_accurate else '⚠ Issues Found'}
	- Issues: {len(result.fact_check.issues_found)}
	"""

	if result.fact_check.issues_found:
	metadata += "\nIssues:\n"
	for issue in result.fact_check.issues_found[:3]: # Show first 3
	metadata += f"- {issue}\n"

	return email, chunks_html, metadata

	def process_query(self, query: str, mode: str) -> Tuple[str, str, str]:
	"""Process query with selected mode."""
	if not query or not query.strip():
	return "Please enter a query.", "", ""

	try:
	if mode == "Simple (Fast)":
	return self.process_query_simple(query)
	else: # Multi-Agent
	# Run async function (will check for dependencies inside)
	return asyncio.run(self.process_query_multi_agent(query))

	except Exception as e:
	logger.error(f"Error processing query: {e}", exc_info=True)
	return f"Error: {str(e)}", "", ""


	# Initialize assistant
	logger.info("Initializing Unified Assistant...")
	assistant = UnifiedAssistant()
	logger.info("Assistant ready!")


	# Example queries
	EXAMPLE_QUERIES = [
	"Wie kann ich mich exmatrikulieren?",
	"What are the deadlines for leave of absence?",
	"Wie ändere ich meinen Namen in den Studiendokumenten?",
	"Welche Versicherungen brauche ich als Student?",
	]


	# Create Gradio interface
	with gr.Blocks(title="BFH Student Administration Assistant") as demo:
	gr.Markdown("# 🎓 BFH Student Administration Email Assistant")
	gr.Markdown("""
	Ask questions about BFH student administration and receive professional email responses.

	Modes:
	- Simple (Fast): Single LLM call (~5-10s) - Best for quick responses
	- Multi-Agent (Quality): Intent + Compose + Fact-Check (~60-75s) - Best for accuracy
	""")

	with gr.Row():
	with gr.Column(scale=2):
	mode_radio = gr.Radio(
	choices=["Simple (Fast)", "Multi-Agent (Quality)"],
	value="Simple (Fast)",
	label="Processing Mode",
	info="Simple mode is faster, Multi-Agent provides higher quality"
	)

	query_input = gr.Textbox(
	label="Your Question",
	placeholder="e.g., Wie kann ich mich exmatrikulieren?",
	lines=3
	)

	submit_btn = gr.Button("Generate Email Response", variant="primary")

	gr.Examples(
	examples=EXAMPLE_QUERIES,
	inputs=query_input,
	label="Example Questions"
	)

	with gr.Column(scale=3):
	email_output = gr.Textbox(
	label="Generated Email",
	lines=15,
	show_copy_button=True
	)

	metadata_output = gr.Markdown(label="Processing Info")

	with gr.Accordion("Retrieved Source Documents", open=False):
	chunks_output = gr.HTML(label="Source Chunks")

	submit_btn.click(
	fn=assistant.process_query,
	inputs=[query_input, mode_radio],
	outputs=[email_output, chunks_output, metadata_output]
	)

	if __name__ == "__main__":
	demo.launch()