File size: 13,230 Bytes
8c35759
 
 
 
a48023f
 
8c35759
 
 
 
 
 
 
 
 
 
 
 
 
a48023f
 
8c35759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a48023f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c35759
 
a48023f
 
 
 
 
 
 
8c35759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a48023f
8c35759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a48023f
8c35759
 
 
 
a48023f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c35759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a48023f
8c35759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
"""Gradio web interface for Project Intelligence Hub."""

from __future__ import annotations

from pathlib import Path
from typing import Any, List, Optional

import gradio as gr

from src.config.settings import Settings, Neo4jConfig, TogetherAIConfig
from src.models.state import AppState
from src.services.builder import GraphRAGBuilder
from src.services.answerer import QueryAnswerer
from src.services.neo4j_service import Neo4jService, Neo4jConnectionError


class GradioApp:
    """Gradio controller for ingestion and query-time interactions."""

    SAMPLE_REPORTS_DIR = Path(__file__).parent.parent.parent / "sample_project_reports"

    TITLE = "Project Intelligence Hub"
    DESCRIPTION = """
# Project Intelligence Hub

Transform unstructured PDF reports into a queryable knowledge graph.

1. **Ingest** — Upload documents to extract entities and relationships
2. **Index** — Build vector embeddings and graph structure
3. **Query** — Retrieve answers via hybrid graph + semantic search
"""

    GRAPH_EXPLORER_QUERIES = {
        "node_labels": """
            CALL db.labels() YIELD label
            CALL { WITH label MATCH (n) WHERE label IN labels(n) RETURN count(n) AS cnt }
            RETURN label, cnt ORDER BY cnt DESC
        """,
        "relationship_types": """
            CALL db.relationshipTypes() YIELD relationshipType
            CALL { WITH relationshipType MATCH ()-[r]->() WHERE type(r) = relationshipType RETURN count(r) AS cnt }
            RETURN relationshipType, cnt ORDER BY cnt DESC
        """,
        "sample_projects": """
            MATCH (p:Project)
            OPTIONAL MATCH (p)-[:HAS_BUDGET]->(b:Budget)
            OPTIONAL MATCH (p)-[:LOCATED_IN]->(l:Location)
            RETURN p.name AS project, b.amount AS budget, b.currency AS currency,
                   l.city AS city, l.country AS country
            LIMIT 10
        """,
    }

    def __init__(self, settings: Settings | None = None) -> None:
        self.settings = settings or Settings.from_env()
        self.answerer = QueryAnswerer()
        self._validate_settings()

    def _validate_settings(self) -> None:
        issues = []
        if not self.settings.together_ai.api_key:
            issues.append("TOGETHER_API_KEY not set in .env")
        if not self.settings.neo4j.uri:
            issues.append("NEO4J_URI not set in .env")
        if not self.settings.neo4j.password:
            issues.append("NEO4J_PASSWORD not set in .env")

        if issues:
            print("Configuration warnings:")
            for issue in issues:
                print(f"  - {issue}")

    def _get_sample_files(self) -> List[str]:
        """Return list of available sample PDF files."""
        if not self.SAMPLE_REPORTS_DIR.exists():
            return []
        return sorted([
            f.name for f in self.SAMPLE_REPORTS_DIR.glob("*.pdf")
        ])

    def _resolve_sample_files(self, selected_names: Optional[List[str]]) -> List[str]:
        """Convert selected sample file names to full paths."""
        if not selected_names:
            return []
        return [
            str(self.SAMPLE_REPORTS_DIR / name)
            for name in selected_names
            if (self.SAMPLE_REPORTS_DIR / name).exists()
        ]

    def _ingest_action(self, pdf_files: List[Any], sample_files: Optional[List[str]], clear_db: str):
        clear_db_bool = clear_db == "Yes"

        # Combine uploaded files with selected sample files
        all_files = list(pdf_files) if pdf_files else []
        sample_paths = self._resolve_sample_files(sample_files)
        all_files.extend(sample_paths)

        if not all_files:
            yield "No documents provided. Upload or select at least one PDF.", gr.update(value=0, visible=True), None
            return

        if not self.settings.together_ai.api_key:
            yield "Missing API credentials: TOGETHER_API_KEY", gr.update(value=0, visible=True), None
            return

        if not self.settings.neo4j.uri or not self.settings.neo4j.password:
            yield "Missing database credentials: NEO4J_URI or NEO4J_PASSWORD", gr.update(value=0, visible=True), None
            return

        together_config = TogetherAIConfig(
            api_key=self.settings.together_ai.api_key,
            chat_model=self.settings.together_ai.chat_model,
            embedding_model=self.settings.together_ai.embedding_model,
        )

        neo4j_config = Neo4jConfig(
            uri=self.settings.neo4j.uri,
            username=self.settings.neo4j.username,
            password=self.settings.neo4j.password,
            database=self.settings.neo4j.database,
        )

        try:
            builder = GraphRAGBuilder(together_config=together_config)

            final_state = None
            for status, progress, state in builder.ingest_with_progress(
                pdf_files=all_files,
                neo4j_config=neo4j_config,
                clear_db=clear_db_bool,
                skip_llm_extraction=True,
            ):
                yield status, gr.update(value=progress, visible=True), state
                if state is not None:
                    final_state = state

            if final_state:
                yield "Pipeline complete. Ready for queries.", gr.update(value=1.0, visible=False), final_state

        except ValueError as e:
            yield f"Configuration error: {e}", gr.update(value=0, visible=True), None
        except Exception as e:
            import traceback
            traceback.print_exc()
            yield f"Pipeline failed: {e}", gr.update(value=0, visible=True), None

    def _clear_action(self) -> str:
        if not self.settings.neo4j.uri or not self.settings.neo4j.password:
            return "Database credentials not configured."

        try:
            with Neo4jService(
                uri=self.settings.neo4j.uri,
                user=self.settings.neo4j.username,
                password=self.settings.neo4j.password,
                database=self.settings.neo4j.database,
            ) as neo4j:
                neo4j.clear()
                return "Graph database cleared. All nodes and relationships removed."
        except Neo4jConnectionError as e:
            return f"Connection error: {e}"
        except Exception as e:
            return f"Operation failed: {e}"

    def _ask_action(self, question: str, state: AppState) -> str:
        return self.answerer.answer(question, state)

    def _explore_graph_action(self) -> str:
        if not self.settings.neo4j.uri or not self.settings.neo4j.password:
            return "Database credentials not configured."

        try:
            with Neo4jService(
                uri=self.settings.neo4j.uri,
                user=self.settings.neo4j.username,
                password=self.settings.neo4j.password,
                database=self.settings.neo4j.database,
            ) as neo4j:
                output = []

                # Node counts by label
                output.append("### Node Distribution\n")
                output.append("| Label | Count |")
                output.append("|-------|-------|")
                try:
                    results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["node_labels"])
                    for row in results:
                        output.append(f"| {row['label']} | {row['cnt']:,} |")
                except Exception:
                    output.append("| (unable to fetch) | - |")

                # Relationship counts
                output.append("\n### Relationship Distribution\n")
                output.append("| Type | Count |")
                output.append("|------|-------|")
                try:
                    results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["relationship_types"])
                    for row in results:
                        output.append(f"| {row['relationshipType']} | {row['cnt']:,} |")
                except Exception:
                    output.append("| (unable to fetch) | - |")

                # Sample projects
                output.append("\n### Sample Projects\n")
                output.append("| Project | Budget | Location |")
                output.append("|---------|--------|----------|")
                try:
                    results = neo4j.query(self.GRAPH_EXPLORER_QUERIES["sample_projects"])
                    if not results:
                        output.append("| (no projects found) | - | - |")
                    for row in results:
                        name = row.get('project') or '-'
                        budget = f"{row.get('budget') or '-'} {row.get('currency') or ''}".strip()
                        location = f"{row.get('city') or ''}, {row.get('country') or ''}".strip(", ")
                        output.append(f"| {name} | {budget} | {location or '-'} |")
                except Exception:
                    output.append("| (unable to fetch) | - | - |")

                return "\n".join(output)

        except Neo4jConnectionError as e:
            return f"Connection error: {e}"
        except Exception as e:
            return f"Failed to fetch graph data: {e}"

    def build(self) -> gr.Blocks:
        with gr.Blocks(title=self.TITLE) as demo:
            gr.Markdown(self.DESCRIPTION)

            state = gr.State(value=None)

            with gr.Group():
                pdfs = gr.File(
                    label="Upload Documents",
                    file_types=[".pdf"],
                    file_count="multiple",
                )

                sample_file_choices = self._get_sample_files()
                if sample_file_choices:
                    sample_files = gr.CheckboxGroup(
                        label="Or Select from Sample Reports",
                        choices=sample_file_choices,
                        value=[],
                    )
                else:
                    sample_files = gr.CheckboxGroup(
                        label="Sample Reports",
                        choices=[],
                        value=[],
                        visible=False,
                    )

                with gr.Row():
                    clear_toggle = gr.Radio(
                        label="Reset graph before ingestion",
                        choices=["Yes", "No"],
                        value="Yes",
                        scale=1,
                    )

                with gr.Row():
                    ingest_btn = gr.Button("Run Ingestion Pipeline", variant="primary", scale=2)
                    clear_btn = gr.Button("Reset Graph", variant="secondary", scale=1)

                progress_bar = gr.Slider(
                    label="Progress",
                    minimum=0,
                    maximum=1,
                    value=0,
                    interactive=False,
                    visible=False,
                )

                ingest_status = gr.Markdown()

            gr.Markdown("---")

            with gr.Group():
                gr.Markdown("### Query Interface")
                question = gr.Textbox(
                    label="Natural Language Query",
                    placeholder="e.g., Compare budget allocations and milestone timelines across projects",
                    lines=2,
                )
                ask_btn = gr.Button("Execute Query", variant="primary")
                answer = gr.Markdown(label="Response")

            with gr.Accordion("Graph Explorer", open=False):
                gr.Markdown("View database contents without direct access to credentials.")
                explore_btn = gr.Button("Load Graph Statistics", variant="secondary")
                graph_stats = gr.Markdown()

            with gr.Accordion("System Configuration", open=False):
                gr.Markdown(self._get_config_status())

            ingest_btn.click(
                fn=self._ingest_action,
                inputs=[pdfs, sample_files, clear_toggle],
                outputs=[ingest_status, progress_bar, state],
            )

            clear_btn.click(
                fn=self._clear_action,
                inputs=[],
                outputs=[ingest_status],
            )

            ask_btn.click(
                fn=self._ask_action,
                inputs=[question, state],
                outputs=[answer],
            )

            explore_btn.click(
                fn=self._explore_graph_action,
                inputs=[],
                outputs=[graph_stats],
            )

        return demo

    def _get_config_status(self) -> str:
        def status(value: str) -> str:
            return "Connected" if value else "Not configured"

        return f"""
| Component | Status |
|-----------|--------|
| LLM Provider (Together AI) | {status(self.settings.together_ai.api_key)} |
| Graph Database (Neo4j) | {status(self.settings.neo4j.uri)} |
"""

    def launch(self, **kwargs) -> None:
        demo = self.build()
        demo.launch(
            server_name=kwargs.get("server_name", self.settings.app.host),
            server_port=kwargs.get("server_port", self.settings.app.port),
            theme=gr.themes.Soft(),
            **{k: v for k, v in kwargs.items() if k not in ("server_name", "server_port")},
        )