Spaces:
Sleeping
Sleeping
Upload 28 files
Browse files
sample_project_reports/FREEPORT GRASSROOT PETROLEUM REFINERY.pdf
ADDED
|
Binary file (10.3 kB). View file
|
|
|
sample_project_reports/RACINE COAL 1000MW AMP GRASSROOT GENERATING STATION.pdf
ADDED
|
Binary file (8.42 kB). View file
|
|
|
sample_project_reports/XIANYANG GRASSROOT POLYSILICON MANUFACTURING PLANT.pdf
ADDED
|
Binary file (10.8 kB). View file
|
|
|
src/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
src/ui/gradio_app.py
CHANGED
|
@@ -2,7 +2,8 @@
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
-
from
|
|
|
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
|
|
@@ -16,6 +17,8 @@ from src.services.neo4j_service import Neo4jService, Neo4jConnectionError
|
|
| 16 |
class GradioApp:
|
| 17 |
"""Gradio controller for ingestion and query-time interactions."""
|
| 18 |
|
|
|
|
|
|
|
| 19 |
TITLE = "Project Intelligence Hub"
|
| 20 |
DESCRIPTION = """
|
| 21 |
# Project Intelligence Hub
|
|
@@ -67,11 +70,34 @@ Transform unstructured PDF reports into a queryable knowledge graph.
|
|
| 67 |
for issue in issues:
|
| 68 |
print(f" - {issue}")
|
| 69 |
|
| 70 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
clear_db_bool = clear_db == "Yes"
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
return
|
| 76 |
|
| 77 |
if not self.settings.together_ai.api_key:
|
|
@@ -100,7 +126,7 @@ Transform unstructured PDF reports into a queryable knowledge graph.
|
|
| 100 |
|
| 101 |
final_state = None
|
| 102 |
for status, progress, state in builder.ingest_with_progress(
|
| 103 |
-
pdf_files=
|
| 104 |
neo4j_config=neo4j_config,
|
| 105 |
clear_db=clear_db_bool,
|
| 106 |
skip_llm_extraction=True,
|
|
@@ -206,11 +232,26 @@ Transform unstructured PDF reports into a queryable knowledge graph.
|
|
| 206 |
|
| 207 |
with gr.Group():
|
| 208 |
pdfs = gr.File(
|
| 209 |
-
label="
|
| 210 |
file_types=[".pdf"],
|
| 211 |
file_count="multiple",
|
| 212 |
)
|
| 213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
with gr.Row():
|
| 215 |
clear_toggle = gr.Radio(
|
| 216 |
label="Reset graph before ingestion",
|
|
@@ -256,7 +297,7 @@ Transform unstructured PDF reports into a queryable knowledge graph.
|
|
| 256 |
|
| 257 |
ingest_btn.click(
|
| 258 |
fn=self._ingest_action,
|
| 259 |
-
inputs=[pdfs, clear_toggle],
|
| 260 |
outputs=[ingest_status, progress_bar, state],
|
| 261 |
)
|
| 262 |
|
|
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any, List, Optional
|
| 7 |
|
| 8 |
import gradio as gr
|
| 9 |
|
|
|
|
| 17 |
class GradioApp:
|
| 18 |
"""Gradio controller for ingestion and query-time interactions."""
|
| 19 |
|
| 20 |
+
SAMPLE_REPORTS_DIR = Path(__file__).parent.parent.parent / "sample_project_reports"
|
| 21 |
+
|
| 22 |
TITLE = "Project Intelligence Hub"
|
| 23 |
DESCRIPTION = """
|
| 24 |
# Project Intelligence Hub
|
|
|
|
| 70 |
for issue in issues:
|
| 71 |
print(f" - {issue}")
|
| 72 |
|
| 73 |
+
def _get_sample_files(self) -> List[str]:
|
| 74 |
+
"""Return list of available sample PDF files."""
|
| 75 |
+
if not self.SAMPLE_REPORTS_DIR.exists():
|
| 76 |
+
return []
|
| 77 |
+
return sorted([
|
| 78 |
+
f.name for f in self.SAMPLE_REPORTS_DIR.glob("*.pdf")
|
| 79 |
+
])
|
| 80 |
+
|
| 81 |
+
def _resolve_sample_files(self, selected_names: Optional[List[str]]) -> List[str]:
|
| 82 |
+
"""Convert selected sample file names to full paths."""
|
| 83 |
+
if not selected_names:
|
| 84 |
+
return []
|
| 85 |
+
return [
|
| 86 |
+
str(self.SAMPLE_REPORTS_DIR / name)
|
| 87 |
+
for name in selected_names
|
| 88 |
+
if (self.SAMPLE_REPORTS_DIR / name).exists()
|
| 89 |
+
]
|
| 90 |
+
|
| 91 |
+
def _ingest_action(self, pdf_files: List[Any], sample_files: Optional[List[str]], clear_db: str):
|
| 92 |
clear_db_bool = clear_db == "Yes"
|
| 93 |
|
| 94 |
+
# Combine uploaded files with selected sample files
|
| 95 |
+
all_files = list(pdf_files) if pdf_files else []
|
| 96 |
+
sample_paths = self._resolve_sample_files(sample_files)
|
| 97 |
+
all_files.extend(sample_paths)
|
| 98 |
+
|
| 99 |
+
if not all_files:
|
| 100 |
+
yield "No documents provided. Upload or select at least one PDF.", gr.update(value=0, visible=True), None
|
| 101 |
return
|
| 102 |
|
| 103 |
if not self.settings.together_ai.api_key:
|
|
|
|
| 126 |
|
| 127 |
final_state = None
|
| 128 |
for status, progress, state in builder.ingest_with_progress(
|
| 129 |
+
pdf_files=all_files,
|
| 130 |
neo4j_config=neo4j_config,
|
| 131 |
clear_db=clear_db_bool,
|
| 132 |
skip_llm_extraction=True,
|
|
|
|
| 232 |
|
| 233 |
with gr.Group():
|
| 234 |
pdfs = gr.File(
|
| 235 |
+
label="Upload Documents",
|
| 236 |
file_types=[".pdf"],
|
| 237 |
file_count="multiple",
|
| 238 |
)
|
| 239 |
|
| 240 |
+
sample_file_choices = self._get_sample_files()
|
| 241 |
+
if sample_file_choices:
|
| 242 |
+
sample_files = gr.CheckboxGroup(
|
| 243 |
+
label="Or Select from Sample Reports",
|
| 244 |
+
choices=sample_file_choices,
|
| 245 |
+
value=[],
|
| 246 |
+
)
|
| 247 |
+
else:
|
| 248 |
+
sample_files = gr.CheckboxGroup(
|
| 249 |
+
label="Sample Reports",
|
| 250 |
+
choices=[],
|
| 251 |
+
value=[],
|
| 252 |
+
visible=False,
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
with gr.Row():
|
| 256 |
clear_toggle = gr.Radio(
|
| 257 |
label="Reset graph before ingestion",
|
|
|
|
| 297 |
|
| 298 |
ingest_btn.click(
|
| 299 |
fn=self._ingest_action,
|
| 300 |
+
inputs=[pdfs, sample_files, clear_toggle],
|
| 301 |
outputs=[ingest_status, progress_bar, state],
|
| 302 |
)
|
| 303 |
|