Spaces:
Sleeping
Sleeping
File size: 13,230 Bytes
8c35759 a48023f 8c35759 a48023f 8c35759 a48023f 8c35759 a48023f 8c35759 a48023f 8c35759 a48023f 8c35759 a48023f 8c35759 a48023f 8c35759 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 | """Gradio web interface for Project Intelligence Hub."""
from __future__ import annotations
from pathlib import Path
from typing import Any, List, Optional
import gradio as gr
from src.config.settings import Settings, Neo4jConfig, TogetherAIConfig
from src.models.state import AppState
from src.services.builder import GraphRAGBuilder
from src.services.answerer import QueryAnswerer
from src.services.neo4j_service import Neo4jService, Neo4jConnectionError
class GradioApp:
"""Gradio controller for ingestion and query-time interactions."""
SAMPLE_REPORTS_DIR = Path(__file__).parent.parent.parent / "sample_project_reports"
TITLE = "Project Intelligence Hub"
DESCRIPTION = """
# Project Intelligence Hub
Transform unstructured PDF reports into a queryable knowledge graph.
1. **Ingest** — Upload documents to extract entities and relationships
2. **Index** — Build vector embeddings and graph structure
3. **Query** — Retrieve answers via hybrid graph + semantic search
"""
GRAPH_EXPLORER_QUERIES = {
"node_labels": """
CALL db.labels() YIELD label
CALL { WITH label MATCH (n) WHERE label IN labels(n) RETURN count(n) AS cnt }
RETURN label, cnt ORDER BY cnt DESC
""",
"relationship_types": """
CALL db.relationshipTypes() YIELD relationshipType
CALL { WITH relationshipType MATCH ()-[r]->() WHERE type(r) = relationshipType RETURN count(r) AS cnt }
RETURN relationshipType, cnt ORDER BY cnt DESC
""",
"sample_projects": """
MATCH (p:Project)
OPTIONAL MATCH (p)-[:HAS_BUDGET]->(b:Budget)
OPTIONAL MATCH (p)-[:LOCATED_IN]->(l:Location)
RETURN p.name AS project, b.amount AS budget, b.currency AS currency,
l.city AS city, l.country AS country
LIMIT 10
""",
}
def __init__(self, settings: Settings | None = None) -> None:
self.settings = settings or Settings.from_env()
self.answerer = QueryAnswerer()
self._validate_settings()
def _validate_settings(self) -> None:
issues = []
if not self.settings.together_ai.api_key:
issues.append("TOGETHER_API_KEY not set in .env")
if not self.settings.neo4j.uri:
issues.append("NEO4J_URI not set in .env")
if not self.settings.neo4j.password:
issues.append("NEO4J_PASSWORD not set in .env")
if issues:
print("Configuration warnings:")
for issue in issues:
print(f" - {issue}")
def _get_sample_files(self) -> List[str]:
"""Return list of available sample PDF files."""
if not self.SAMPLE_REPORTS_DIR.exists():
return []
return sorted([
f.name for f in self.SAMPLE_REPORTS_DIR.glob("*.pdf")
])
def _resolve_sample_files(self, selected_names: Optional[List[str]]) -> List[str]:
"""Convert selected sample file names to full paths."""
if not selected_names:
return []
return [
str(self.SAMPLE_REPORTS_DIR / name)
for name in selected_names
if (self.SAMPLE_REPORTS_DIR / name).exists()
]
def _ingest_action(self, pdf_files: List[Any], sample_files: Optional[List[str]], clear_db: str):
clear_db_bool = clear_db == "Yes"
# Combine uploaded files with selected sample files
all_files = list(pdf_files) if pdf_files else []
sample_paths = self._resolve_sample_files(sample_files)
all_files.extend(sample_paths)
if not all_files:
yield "No documents provided. Upload or select at least one PDF.", gr.update(value=0, visible=True), None
return
if not self.settings.together_ai.api_key:
yield "Missing API credentials: TOGETHER_API_KEY", gr.update(value=0, visible=True), None
return
if not self.settings.neo4j.uri or not self.settings.neo4j.password:
yield "Missing database credentials: NEO4J_URI or NEO4J_PASSWORD", gr.update(value=0, visible=True), None
return
together_config = TogetherAIConfig(
api_key=self.settings.together_ai.api_key,
chat_model=self.settings.together_ai.chat_model,
embedding_model=self.settings.together_ai.embedding_model,
)
neo4j_config = Neo4jConfig(
uri=self.settings.neo4j.uri,
username=self.settings.neo4j.username,
password=self.settings.neo4j.password,
database=self.settings.neo4j.database,
)
try:
builder = GraphRAGBuilder(together_config=together_config)
final_state = None
for status, progress, state in builder.ingest_with_progress(
pdf_files=all_files,
neo4j_config=neo4j_config,
clear_db=clear_db_bool,
skip_llm_extraction=True,
):
yield status, gr.update(value=progress, visible=True), state
if state is not None:
final_state = state
if final_state:
yield "Pipeline complete. Ready for queries.", gr.update(value=1.0, visible=False), final_state
except ValueError as e:
yield f"Configuration error: {e}", gr.update(value=0, visible=True), None
except Exception as e:
import traceback
traceback.print_exc()
yield f"Pipeline failed: {e}", gr.update(value=0, visible=True), None
def _clear_action(self) -> str:
if not self.settings.neo4j.uri or not self.settings.neo4j.password:
return "Database credentials not configured."
try:
with Neo4jService(
uri=self.settings.neo4j.uri,
user=self.settings.neo4j.username,
password=self.settings.neo4j.password,
database=self.settings.neo4j.database,
) as neo4j:
neo4j.clear()
return "Graph database cleared. All nodes and relationships removed."
except Neo4jConnectionError as e:
return f"Connection error: {e}"
except Exception as e:
return f"Operation failed: {e}"
def _ask_action(self, question: str, state: AppState) -> str:
return self.answerer.answer(question, state)
def _explore_graph_action(self) -> str:
if not self.settings.neo4j.uri or not self.settings.neo4j.password:
return "Database credentials not configured."
try:
with Neo4jService(
uri=self.settings.neo4j.uri,
user=self.settings.neo4j.username,
password=self.settings.neo4j.password,
database=self.settings.neo4j.database,
) as neo4j:
output = []
# Node counts by label
output.append("### Node Distribution\n")
output.append("| Label | Count |")
output.append("|-------|-------|")
try:
results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["node_labels"])
for row in results:
output.append(f"| {row['label']} | {row['cnt']:,} |")
except Exception:
output.append("| (unable to fetch) | - |")
# Relationship counts
output.append("\n### Relationship Distribution\n")
output.append("| Type | Count |")
output.append("|------|-------|")
try:
results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["relationship_types"])
for row in results:
output.append(f"| {row['relationshipType']} | {row['cnt']:,} |")
except Exception:
output.append("| (unable to fetch) | - |")
# Sample projects
output.append("\n### Sample Projects\n")
output.append("| Project | Budget | Location |")
output.append("|---------|--------|----------|")
try:
results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["sample_projects"])
if not results:
output.append("| (no projects found) | - | - |")
for row in results:
name = row.get('project') or '-'
budget = f"{row.get('budget') or '-'} {row.get('currency') or ''}".strip()
location = f"{row.get('city') or ''}, {row.get('country') or ''}".strip(", ")
output.append(f"| {name} | {budget} | {location or '-'} |")
except Exception:
output.append("| (unable to fetch) | - | - |")
return "\n".join(output)
except Neo4jConnectionError as e:
return f"Connection error: {e}"
except Exception as e:
return f"Failed to fetch graph data: {e}"
def build(self) -> gr.Blocks:
with gr.Blocks(title=self.TITLE) as demo:
gr.Markdown(self.DESCRIPTION)
state = gr.State(value=None)
with gr.Group():
pdfs = gr.File(
label="Upload Documents",
file_types=[".pdf"],
file_count="multiple",
)
sample_file_choices = self._get_sample_files()
if sample_file_choices:
sample_files = gr.CheckboxGroup(
label="Or Select from Sample Reports",
choices=sample_file_choices,
value=[],
)
else:
sample_files = gr.CheckboxGroup(
label="Sample Reports",
choices=[],
value=[],
visible=False,
)
with gr.Row():
clear_toggle = gr.Radio(
label="Reset graph before ingestion",
choices=["Yes", "No"],
value="Yes",
scale=1,
)
with gr.Row():
ingest_btn = gr.Button("Run Ingestion Pipeline", variant="primary", scale=2)
clear_btn = gr.Button("Reset Graph", variant="secondary", scale=1)
progress_bar = gr.Slider(
label="Progress",
minimum=0,
maximum=1,
value=0,
interactive=False,
visible=False,
)
ingest_status = gr.Markdown()
gr.Markdown("---")
with gr.Group():
gr.Markdown("### Query Interface")
question = gr.Textbox(
label="Natural Language Query",
placeholder="e.g., Compare budget allocations and milestone timelines across projects",
lines=2,
)
ask_btn = gr.Button("Execute Query", variant="primary")
answer = gr.Markdown(label="Response")
with gr.Accordion("Graph Explorer", open=False):
gr.Markdown("View database contents without direct access to credentials.")
explore_btn = gr.Button("Load Graph Statistics", variant="secondary")
graph_stats = gr.Markdown()
with gr.Accordion("System Configuration", open=False):
gr.Markdown(self._get_config_status())
ingest_btn.click(
fn=self._ingest_action,
inputs=[pdfs, sample_files, clear_toggle],
outputs=[ingest_status, progress_bar, state],
)
clear_btn.click(
fn=self._clear_action,
inputs=[],
outputs=[ingest_status],
)
ask_btn.click(
fn=self._ask_action,
inputs=[question, state],
outputs=[answer],
)
explore_btn.click(
fn=self._explore_graph_action,
inputs=[],
outputs=[graph_stats],
)
return demo
def _get_config_status(self) -> str:
def status(value: str) -> str:
return "Connected" if value else "Not configured"
return f"""
| Component | Status |
|-----------|--------|
| LLM Provider (Together AI) | {status(self.settings.together_ai.api_key)} |
| Graph Database (Neo4j) | {status(self.settings.neo4j.uri)} |
"""
def launch(self, **kwargs) -> None:
demo = self.build()
demo.launch(
server_name=kwargs.get("server_name", self.settings.app.host),
server_port=kwargs.get("server_port", self.settings.app.port),
theme=gr.themes.Soft(),
**{k: v for k, v in kwargs.items() if k not in ("server_name", "server_port")},
)
|