adi-123's picture
Upload 28 files
a48023f verified
"""Gradio web interface for Project Intelligence Hub."""
from __future__ import annotations
from pathlib import Path
from typing import Any, List, Optional
import gradio as gr
from src.config.settings import Settings, Neo4jConfig, TogetherAIConfig
from src.models.state import AppState
from src.services.builder import GraphRAGBuilder
from src.services.answerer import QueryAnswerer
from src.services.neo4j_service import Neo4jService, Neo4jConnectionError
class GradioApp:
"""Gradio controller for ingestion and query-time interactions."""
SAMPLE_REPORTS_DIR = Path(__file__).parent.parent.parent / "sample_project_reports"
TITLE = "Project Intelligence Hub"
DESCRIPTION = """
# Project Intelligence Hub
Transform unstructured PDF reports into a queryable knowledge graph.
1. **Ingest** — Upload documents to extract entities and relationships
2. **Index** — Build vector embeddings and graph structure
3. **Query** — Retrieve answers via hybrid graph + semantic search
"""
GRAPH_EXPLORER_QUERIES = {
"node_labels": """
CALL db.labels() YIELD label
CALL { WITH label MATCH (n) WHERE label IN labels(n) RETURN count(n) AS cnt }
RETURN label, cnt ORDER BY cnt DESC
""",
"relationship_types": """
CALL db.relationshipTypes() YIELD relationshipType
CALL { WITH relationshipType MATCH ()-[r]->() WHERE type(r) = relationshipType RETURN count(r) AS cnt }
RETURN relationshipType, cnt ORDER BY cnt DESC
""",
"sample_projects": """
MATCH (p:Project)
OPTIONAL MATCH (p)-[:HAS_BUDGET]->(b:Budget)
OPTIONAL MATCH (p)-[:LOCATED_IN]->(l:Location)
RETURN p.name AS project, b.amount AS budget, b.currency AS currency,
l.city AS city, l.country AS country
LIMIT 10
""",
}
def __init__(self, settings: Settings | None = None) -> None:
self.settings = settings or Settings.from_env()
self.answerer = QueryAnswerer()
self._validate_settings()
def _validate_settings(self) -> None:
issues = []
if not self.settings.together_ai.api_key:
issues.append("TOGETHER_API_KEY not set in .env")
if not self.settings.neo4j.uri:
issues.append("NEO4J_URI not set in .env")
if not self.settings.neo4j.password:
issues.append("NEO4J_PASSWORD not set in .env")
if issues:
print("Configuration warnings:")
for issue in issues:
print(f" - {issue}")
def _get_sample_files(self) -> List[str]:
"""Return list of available sample PDF files."""
if not self.SAMPLE_REPORTS_DIR.exists():
return []
return sorted([
f.name for f in self.SAMPLE_REPORTS_DIR.glob("*.pdf")
])
def _resolve_sample_files(self, selected_names: Optional[List[str]]) -> List[str]:
"""Convert selected sample file names to full paths."""
if not selected_names:
return []
return [
str(self.SAMPLE_REPORTS_DIR / name)
for name in selected_names
if (self.SAMPLE_REPORTS_DIR / name).exists()
]
def _ingest_action(self, pdf_files: List[Any], sample_files: Optional[List[str]], clear_db: str):
clear_db_bool = clear_db == "Yes"
# Combine uploaded files with selected sample files
all_files = list(pdf_files) if pdf_files else []
sample_paths = self._resolve_sample_files(sample_files)
all_files.extend(sample_paths)
if not all_files:
yield "No documents provided. Upload or select at least one PDF.", gr.update(value=0, visible=True), None
return
if not self.settings.together_ai.api_key:
yield "Missing API credentials: TOGETHER_API_KEY", gr.update(value=0, visible=True), None
return
if not self.settings.neo4j.uri or not self.settings.neo4j.password:
yield "Missing database credentials: NEO4J_URI or NEO4J_PASSWORD", gr.update(value=0, visible=True), None
return
together_config = TogetherAIConfig(
api_key=self.settings.together_ai.api_key,
chat_model=self.settings.together_ai.chat_model,
embedding_model=self.settings.together_ai.embedding_model,
)
neo4j_config = Neo4jConfig(
uri=self.settings.neo4j.uri,
username=self.settings.neo4j.username,
password=self.settings.neo4j.password,
database=self.settings.neo4j.database,
)
try:
builder = GraphRAGBuilder(together_config=together_config)
final_state = None
for status, progress, state in builder.ingest_with_progress(
pdf_files=all_files,
neo4j_config=neo4j_config,
clear_db=clear_db_bool,
skip_llm_extraction=True,
):
yield status, gr.update(value=progress, visible=True), state
if state is not None:
final_state = state
if final_state:
yield "Pipeline complete. Ready for queries.", gr.update(value=1.0, visible=False), final_state
except ValueError as e:
yield f"Configuration error: {e}", gr.update(value=0, visible=True), None
except Exception as e:
import traceback
traceback.print_exc()
yield f"Pipeline failed: {e}", gr.update(value=0, visible=True), None
def _clear_action(self) -> str:
if not self.settings.neo4j.uri or not self.settings.neo4j.password:
return "Database credentials not configured."
try:
with Neo4jService(
uri=self.settings.neo4j.uri,
user=self.settings.neo4j.username,
password=self.settings.neo4j.password,
database=self.settings.neo4j.database,
) as neo4j:
neo4j.clear()
return "Graph database cleared. All nodes and relationships removed."
except Neo4jConnectionError as e:
return f"Connection error: {e}"
except Exception as e:
return f"Operation failed: {e}"
def _ask_action(self, question: str, state: AppState) -> str:
return self.answerer.answer(question, state)
def _explore_graph_action(self) -> str:
if not self.settings.neo4j.uri or not self.settings.neo4j.password:
return "Database credentials not configured."
try:
with Neo4jService(
uri=self.settings.neo4j.uri,
user=self.settings.neo4j.username,
password=self.settings.neo4j.password,
database=self.settings.neo4j.database,
) as neo4j:
output = []
# Node counts by label
output.append("### Node Distribution\n")
output.append("| Label | Count |")
output.append("|-------|-------|")
try:
results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["node_labels"])
for row in results:
output.append(f"| {row['label']} | {row['cnt']:,} |")
except Exception:
output.append("| (unable to fetch) | - |")
# Relationship counts
output.append("\n### Relationship Distribution\n")
output.append("| Type | Count |")
output.append("|------|-------|")
try:
results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["relationship_types"])
for row in results:
output.append(f"| {row['relationshipType']} | {row['cnt']:,} |")
except Exception:
output.append("| (unable to fetch) | - |")
# Sample projects
output.append("\n### Sample Projects\n")
output.append("| Project | Budget | Location |")
output.append("|---------|--------|----------|")
try:
results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["sample_projects"])
if not results:
output.append("| (no projects found) | - | - |")
for row in results:
name = row.get('project') or '-'
budget = f"{row.get('budget') or '-'} {row.get('currency') or ''}".strip()
location = f"{row.get('city') or ''}, {row.get('country') or ''}".strip(", ")
output.append(f"| {name} | {budget} | {location or '-'} |")
except Exception:
output.append("| (unable to fetch) | - | - |")
return "\n".join(output)
except Neo4jConnectionError as e:
return f"Connection error: {e}"
except Exception as e:
return f"Failed to fetch graph data: {e}"
def build(self) -> gr.Blocks:
with gr.Blocks(title=self.TITLE) as demo:
gr.Markdown(self.DESCRIPTION)
state = gr.State(value=None)
with gr.Group():
pdfs = gr.File(
label="Upload Documents",
file_types=[".pdf"],
file_count="multiple",
)
sample_file_choices = self._get_sample_files()
if sample_file_choices:
sample_files = gr.CheckboxGroup(
label="Or Select from Sample Reports",
choices=sample_file_choices,
value=[],
)
else:
sample_files = gr.CheckboxGroup(
label="Sample Reports",
choices=[],
value=[],
visible=False,
)
with gr.Row():
clear_toggle = gr.Radio(
label="Reset graph before ingestion",
choices=["Yes", "No"],
value="Yes",
scale=1,
)
with gr.Row():
ingest_btn = gr.Button("Run Ingestion Pipeline", variant="primary", scale=2)
clear_btn = gr.Button("Reset Graph", variant="secondary", scale=1)
progress_bar = gr.Slider(
label="Progress",
minimum=0,
maximum=1,
value=0,
interactive=False,
visible=False,
)
ingest_status = gr.Markdown()
gr.Markdown("---")
with gr.Group():
gr.Markdown("### Query Interface")
question = gr.Textbox(
label="Natural Language Query",
placeholder="e.g., Compare budget allocations and milestone timelines across projects",
lines=2,
)
ask_btn = gr.Button("Execute Query", variant="primary")
answer = gr.Markdown(label="Response")
with gr.Accordion("Graph Explorer", open=False):
gr.Markdown("View database contents without direct access to credentials.")
explore_btn = gr.Button("Load Graph Statistics", variant="secondary")
graph_stats = gr.Markdown()
with gr.Accordion("System Configuration", open=False):
gr.Markdown(self._get_config_status())
ingest_btn.click(
fn=self._ingest_action,
inputs=[pdfs, sample_files, clear_toggle],
outputs=[ingest_status, progress_bar, state],
)
clear_btn.click(
fn=self._clear_action,
inputs=[],
outputs=[ingest_status],
)
ask_btn.click(
fn=self._ask_action,
inputs=[question, state],
outputs=[answer],
)
explore_btn.click(
fn=self._explore_graph_action,
inputs=[],
outputs=[graph_stats],
)
return demo
def _get_config_status(self) -> str:
def status(value: str) -> str:
return "Connected" if value else "Not configured"
return f"""
| Component | Status |
|-----------|--------|
| LLM Provider (Together AI) | {status(self.settings.together_ai.api_key)} |
| Graph Database (Neo4j) | {status(self.settings.neo4j.uri)} |
"""
def launch(self, **kwargs) -> None:
demo = self.build()
demo.launch(
server_name=kwargs.get("server_name", self.settings.app.host),
server_port=kwargs.get("server_port", self.settings.app.port),
theme=gr.themes.Soft(),
**{k: v for k, v in kwargs.items() if k not in ("server_name", "server_port")},
)