"""Gradio web interface for Project Intelligence Hub.""" from __future__ import annotations from pathlib import Path from typing import Any, List, Optional import gradio as gr from src.config.settings import Settings, Neo4jConfig, TogetherAIConfig from src.models.state import AppState from src.services.builder import GraphRAGBuilder from src.services.answerer import QueryAnswerer from src.services.neo4j_service import Neo4jService, Neo4jConnectionError class GradioApp: """Gradio controller for ingestion and query-time interactions.""" SAMPLE_REPORTS_DIR = Path(__file__).parent.parent.parent / "sample_project_reports" TITLE = "Project Intelligence Hub" DESCRIPTION = """ # Project Intelligence Hub Transform unstructured PDF reports into a queryable knowledge graph. 1. **Ingest** — Upload documents to extract entities and relationships 2. **Index** — Build vector embeddings and graph structure 3. **Query** — Retrieve answers via hybrid graph + semantic search """ GRAPH_EXPLORER_QUERIES = { "node_labels": """ CALL db.labels() YIELD label CALL { WITH label MATCH (n) WHERE label IN labels(n) RETURN count(n) AS cnt } RETURN label, cnt ORDER BY cnt DESC """, "relationship_types": """ CALL db.relationshipTypes() YIELD relationshipType CALL { WITH relationshipType MATCH ()-[r]->() WHERE type(r) = relationshipType RETURN count(r) AS cnt } RETURN relationshipType, cnt ORDER BY cnt DESC """, "sample_projects": """ MATCH (p:Project) OPTIONAL MATCH (p)-[:HAS_BUDGET]->(b:Budget) OPTIONAL MATCH (p)-[:LOCATED_IN]->(l:Location) RETURN p.name AS project, b.amount AS budget, b.currency AS currency, l.city AS city, l.country AS country LIMIT 10 """, } def __init__(self, settings: Settings | None = None) -> None: self.settings = settings or Settings.from_env() self.answerer = QueryAnswerer() self._validate_settings() def _validate_settings(self) -> None: issues = [] if not self.settings.together_ai.api_key: issues.append("TOGETHER_API_KEY not set in .env") if not self.settings.neo4j.uri: issues.append("NEO4J_URI not set in .env") if not self.settings.neo4j.password: issues.append("NEO4J_PASSWORD not set in .env") if issues: print("Configuration warnings:") for issue in issues: print(f" - {issue}") def _get_sample_files(self) -> List[str]: """Return list of available sample PDF files.""" if not self.SAMPLE_REPORTS_DIR.exists(): return [] return sorted([ f.name for f in self.SAMPLE_REPORTS_DIR.glob("*.pdf") ]) def _resolve_sample_files(self, selected_names: Optional[List[str]]) -> List[str]: """Convert selected sample file names to full paths.""" if not selected_names: return [] return [ str(self.SAMPLE_REPORTS_DIR / name) for name in selected_names if (self.SAMPLE_REPORTS_DIR / name).exists() ] def _ingest_action(self, pdf_files: List[Any], sample_files: Optional[List[str]], clear_db: str): clear_db_bool = clear_db == "Yes" # Combine uploaded files with selected sample files all_files = list(pdf_files) if pdf_files else [] sample_paths = self._resolve_sample_files(sample_files) all_files.extend(sample_paths) if not all_files: yield "No documents provided. Upload or select at least one PDF.", gr.update(value=0, visible=True), None return if not self.settings.together_ai.api_key: yield "Missing API credentials: TOGETHER_API_KEY", gr.update(value=0, visible=True), None return if not self.settings.neo4j.uri or not self.settings.neo4j.password: yield "Missing database credentials: NEO4J_URI or NEO4J_PASSWORD", gr.update(value=0, visible=True), None return together_config = TogetherAIConfig( api_key=self.settings.together_ai.api_key, chat_model=self.settings.together_ai.chat_model, embedding_model=self.settings.together_ai.embedding_model, ) neo4j_config = Neo4jConfig( uri=self.settings.neo4j.uri, username=self.settings.neo4j.username, password=self.settings.neo4j.password, database=self.settings.neo4j.database, ) try: builder = GraphRAGBuilder(together_config=together_config) final_state = None for status, progress, state in builder.ingest_with_progress( pdf_files=all_files, neo4j_config=neo4j_config, clear_db=clear_db_bool, skip_llm_extraction=True, ): yield status, gr.update(value=progress, visible=True), state if state is not None: final_state = state if final_state: yield "Pipeline complete. Ready for queries.", gr.update(value=1.0, visible=False), final_state except ValueError as e: yield f"Configuration error: {e}", gr.update(value=0, visible=True), None except Exception as e: import traceback traceback.print_exc() yield f"Pipeline failed: {e}", gr.update(value=0, visible=True), None def _clear_action(self) -> str: if not self.settings.neo4j.uri or not self.settings.neo4j.password: return "Database credentials not configured." try: with Neo4jService( uri=self.settings.neo4j.uri, user=self.settings.neo4j.username, password=self.settings.neo4j.password, database=self.settings.neo4j.database, ) as neo4j: neo4j.clear() return "Graph database cleared. All nodes and relationships removed." except Neo4jConnectionError as e: return f"Connection error: {e}" except Exception as e: return f"Operation failed: {e}" def _ask_action(self, question: str, state: AppState) -> str: return self.answerer.answer(question, state) def _explore_graph_action(self) -> str: if not self.settings.neo4j.uri or not self.settings.neo4j.password: return "Database credentials not configured." try: with Neo4jService( uri=self.settings.neo4j.uri, user=self.settings.neo4j.username, password=self.settings.neo4j.password, database=self.settings.neo4j.database, ) as neo4j: output = [] # Node counts by label output.append("### Node Distribution\n") output.append("| Label | Count |") output.append("|-------|-------|") try: results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["node_labels"]) for row in results: output.append(f"| {row['label']} | {row['cnt']:,} |") except Exception: output.append("| (unable to fetch) | - |") # Relationship counts output.append("\n### Relationship Distribution\n") output.append("| Type | Count |") output.append("|------|-------|") try: results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["relationship_types"]) for row in results: output.append(f"| {row['relationshipType']} | {row['cnt']:,} |") except Exception: output.append("| (unable to fetch) | - |") # Sample projects output.append("\n### Sample Projects\n") output.append("| Project | Budget | Location |") output.append("|---------|--------|----------|") try: results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["sample_projects"]) if not results: output.append("| (no projects found) | - | - |") for row in results: name = row.get('project') or '-' budget = f"{row.get('budget') or '-'} {row.get('currency') or ''}".strip() location = f"{row.get('city') or ''}, {row.get('country') or ''}".strip(", ") output.append(f"| {name} | {budget} | {location or '-'} |") except Exception: output.append("| (unable to fetch) | - | - |") return "\n".join(output) except Neo4jConnectionError as e: return f"Connection error: {e}" except Exception as e: return f"Failed to fetch graph data: {e}" def build(self) -> gr.Blocks: with gr.Blocks(title=self.TITLE) as demo: gr.Markdown(self.DESCRIPTION) state = gr.State(value=None) with gr.Group(): pdfs = gr.File( label="Upload Documents", file_types=[".pdf"], file_count="multiple", ) sample_file_choices = self._get_sample_files() if sample_file_choices: sample_files = gr.CheckboxGroup( label="Or Select from Sample Reports", choices=sample_file_choices, value=[], ) else: sample_files = gr.CheckboxGroup( label="Sample Reports", choices=[], value=[], visible=False, ) with gr.Row(): clear_toggle = gr.Radio( label="Reset graph before ingestion", choices=["Yes", "No"], value="Yes", scale=1, ) with gr.Row(): ingest_btn = gr.Button("Run Ingestion Pipeline", variant="primary", scale=2) clear_btn = gr.Button("Reset Graph", variant="secondary", scale=1) progress_bar = gr.Slider( label="Progress", minimum=0, maximum=1, value=0, interactive=False, visible=False, ) ingest_status = gr.Markdown() gr.Markdown("---") with gr.Group(): gr.Markdown("### Query Interface") question = gr.Textbox( label="Natural Language Query", placeholder="e.g., Compare budget allocations and milestone timelines across projects", lines=2, ) ask_btn = gr.Button("Execute Query", variant="primary") answer = gr.Markdown(label="Response") with gr.Accordion("Graph Explorer", open=False): gr.Markdown("View database contents without direct access to credentials.") explore_btn = gr.Button("Load Graph Statistics", variant="secondary") graph_stats = gr.Markdown() with gr.Accordion("System Configuration", open=False): gr.Markdown(self._get_config_status()) ingest_btn.click( fn=self._ingest_action, inputs=[pdfs, sample_files, clear_toggle], outputs=[ingest_status, progress_bar, state], ) clear_btn.click( fn=self._clear_action, inputs=[], outputs=[ingest_status], ) ask_btn.click( fn=self._ask_action, inputs=[question, state], outputs=[answer], ) explore_btn.click( fn=self._explore_graph_action, inputs=[], outputs=[graph_stats], ) return demo def _get_config_status(self) -> str: def status(value: str) -> str: return "Connected" if value else "Not configured" return f""" | Component | Status | |-----------|--------| | LLM Provider (Together AI) | {status(self.settings.together_ai.api_key)} | | Graph Database (Neo4j) | {status(self.settings.neo4j.uri)} | """ def launch(self, **kwargs) -> None: demo = self.build() demo.launch( server_name=kwargs.get("server_name", self.settings.app.host), server_port=kwargs.get("server_port", self.settings.app.port), theme=gr.themes.Soft(), **{k: v for k, v in kwargs.items() if k not in ("server_name", "server_port")}, )