Spaces:
Sleeping
Sleeping
diogo.rodrigues.silva commited on
Commit ·
3220a1d
1
Parent(s): 0c2ed96
fix download
Browse files- __pycache__/app.cpython-310.pyc +0 -0
- app.py +61 -17
__pycache__/app.cpython-310.pyc
ADDED
|
Binary file (10.6 kB). View file
|
|
|
app.py
CHANGED
|
@@ -112,10 +112,14 @@ def _validate_upload_file(path: Path, allowed_suffixes: set[str], max_mb: int) -
|
|
| 112 |
|
| 113 |
def parse_files(reference_files: list[str] | None):
|
| 114 |
if not reference_files:
|
| 115 |
-
return "Upload at least one reference file.", None, None
|
| 116 |
|
| 117 |
if len(reference_files) > MAX_UPLOAD_FILES:
|
| 118 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
input_paths = [Path(p) for p in reference_files]
|
| 121 |
for p in input_paths:
|
|
@@ -125,17 +129,21 @@ def parse_files(reference_files: list[str] | None):
|
|
| 125 |
max_mb=MAX_UPLOAD_FILE_MB,
|
| 126 |
)
|
| 127 |
if validation_error:
|
| 128 |
-
return validation_error, None, None
|
| 129 |
|
| 130 |
run_dir = _new_run_dir()
|
| 131 |
parsed_output = run_dir / f"parsed_{_timestamp_slug()}.xlsx"
|
| 132 |
|
| 133 |
df = reference_parser.parse_references(input_paths=input_paths, output_path=parsed_output)
|
| 134 |
if df.empty:
|
| 135 |
-
return "No records found in the uploaded files.", None, None
|
| 136 |
|
| 137 |
msg = f"Parsed {len(df)} deduplicated records."
|
| 138 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
|
| 141 |
def screen_excel(
|
|
@@ -151,14 +159,23 @@ def screen_excel(
|
|
| 151 |
):
|
| 152 |
missing = _missing_secrets()
|
| 153 |
if missing:
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
if not parsed_excel_path:
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
parsed_path = Path(parsed_excel_path)
|
| 160 |
if not parsed_path.exists():
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
screened_output = parsed_path.parent / f"screened_{_timestamp_slug()}.xlsx"
|
| 164 |
criteria_path = None
|
|
@@ -171,7 +188,8 @@ def screen_excel(
|
|
| 171 |
max_mb=MAX_CRITERIA_FILE_MB,
|
| 172 |
)
|
| 173 |
if validation_error:
|
| 174 |
-
|
|
|
|
| 175 |
criteria_path = candidate
|
| 176 |
else:
|
| 177 |
inclusion = _parse_criteria_lines(criteria_inclusion_text)
|
|
@@ -179,11 +197,20 @@ def screen_excel(
|
|
| 179 |
topic = (criteria_topic or "").strip()
|
| 180 |
|
| 181 |
if not topic:
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
| 183 |
if not inclusion:
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
| 185 |
if not exclusion:
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
criteria_obj = {
|
| 189 |
"topic": topic,
|
|
@@ -234,6 +261,8 @@ def screen_excel(
|
|
| 234 |
worker = threading.Thread(target=_run_screening, daemon=True)
|
| 235 |
worker.start()
|
| 236 |
|
|
|
|
|
|
|
| 237 |
while worker.is_alive():
|
| 238 |
elapsed_s = int(time.perf_counter() - started)
|
| 239 |
eta_s = int(progress_state["eta_seconds"])
|
|
@@ -253,15 +282,20 @@ def screen_excel(
|
|
| 253 |
else:
|
| 254 |
status = f"Initializing client... Elapsed {elapsed_min:02d}:{elapsed_sec:02d}"
|
| 255 |
|
| 256 |
-
yield status, None
|
| 257 |
time.sleep(1)
|
| 258 |
|
| 259 |
if worker_error["exc"] is not None:
|
| 260 |
-
yield f"Screening failed: {worker_error['exc']}",
|
|
|
|
|
|
|
| 261 |
return
|
| 262 |
|
| 263 |
progress(1, desc="Screening complete.")
|
| 264 |
-
yield
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
|
| 267 |
def build_app() -> gr.Blocks:
|
|
@@ -290,10 +324,20 @@ def build_app() -> gr.Blocks:
|
|
| 290 |
)
|
| 291 |
parse_btn = gr.Button("1) Run Parser", variant="primary")
|
| 292 |
parse_status = gr.Textbox(label="Parser Status", interactive=False)
|
| 293 |
-
parsed_excel_download = gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
screen_btn = gr.Button("2) Screen Excel", variant="primary")
|
| 295 |
screen_status = gr.Textbox(label="Screening Status", interactive=False)
|
| 296 |
-
screened_excel_download = gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
with gr.Column():
|
| 299 |
criteria_file = gr.File(
|
|
|
|
| 112 |
|
| 113 |
def parse_files(reference_files: list[str] | None):
|
| 114 |
if not reference_files:
|
| 115 |
+
return "Upload at least one reference file.", None, gr.update(value=None, visible=False, interactive=False)
|
| 116 |
|
| 117 |
if len(reference_files) > MAX_UPLOAD_FILES:
|
| 118 |
+
return (
|
| 119 |
+
f"Too many files. Maximum allowed is {MAX_UPLOAD_FILES}.",
|
| 120 |
+
None,
|
| 121 |
+
gr.update(value=None, visible=False, interactive=False),
|
| 122 |
+
)
|
| 123 |
|
| 124 |
input_paths = [Path(p) for p in reference_files]
|
| 125 |
for p in input_paths:
|
|
|
|
| 129 |
max_mb=MAX_UPLOAD_FILE_MB,
|
| 130 |
)
|
| 131 |
if validation_error:
|
| 132 |
+
return validation_error, None, gr.update(value=None, visible=False, interactive=False)
|
| 133 |
|
| 134 |
run_dir = _new_run_dir()
|
| 135 |
parsed_output = run_dir / f"parsed_{_timestamp_slug()}.xlsx"
|
| 136 |
|
| 137 |
df = reference_parser.parse_references(input_paths=input_paths, output_path=parsed_output)
|
| 138 |
if df.empty:
|
| 139 |
+
return "No records found in the uploaded files.", None, gr.update(value=None, visible=False, interactive=False)
|
| 140 |
|
| 141 |
msg = f"Parsed {len(df)} deduplicated records."
|
| 142 |
+
return (
|
| 143 |
+
msg,
|
| 144 |
+
str(parsed_output),
|
| 145 |
+
gr.update(value=str(parsed_output), visible=True, interactive=True),
|
| 146 |
+
)
|
| 147 |
|
| 148 |
|
| 149 |
def screen_excel(
|
|
|
|
| 159 |
):
|
| 160 |
missing = _missing_secrets()
|
| 161 |
if missing:
|
| 162 |
+
yield "Missing required Space secrets: " + ", ".join(missing), gr.update(
|
| 163 |
+
value=None, visible=False, interactive=False
|
| 164 |
+
)
|
| 165 |
+
return
|
| 166 |
|
| 167 |
if not parsed_excel_path:
|
| 168 |
+
yield "Run parser first to generate a parsed Excel file.", gr.update(
|
| 169 |
+
value=None, visible=False, interactive=False
|
| 170 |
+
)
|
| 171 |
+
return
|
| 172 |
|
| 173 |
parsed_path = Path(parsed_excel_path)
|
| 174 |
if not parsed_path.exists():
|
| 175 |
+
yield "Parsed Excel file was not found. Please run parser again.", gr.update(
|
| 176 |
+
value=None, visible=False, interactive=False
|
| 177 |
+
)
|
| 178 |
+
return
|
| 179 |
|
| 180 |
screened_output = parsed_path.parent / f"screened_{_timestamp_slug()}.xlsx"
|
| 181 |
criteria_path = None
|
|
|
|
| 188 |
max_mb=MAX_CRITERIA_FILE_MB,
|
| 189 |
)
|
| 190 |
if validation_error:
|
| 191 |
+
yield validation_error, gr.update(value=None, visible=False, interactive=False)
|
| 192 |
+
return
|
| 193 |
criteria_path = candidate
|
| 194 |
else:
|
| 195 |
inclusion = _parse_criteria_lines(criteria_inclusion_text)
|
|
|
|
| 197 |
topic = (criteria_topic or "").strip()
|
| 198 |
|
| 199 |
if not topic:
|
| 200 |
+
yield "Provide a criteria topic (or upload criteria file).", gr.update(
|
| 201 |
+
value=None, visible=False, interactive=False
|
| 202 |
+
)
|
| 203 |
+
return
|
| 204 |
if not inclusion:
|
| 205 |
+
yield "Provide at least one inclusion criterion (or upload criteria file).", gr.update(
|
| 206 |
+
value=None, visible=False, interactive=False
|
| 207 |
+
)
|
| 208 |
+
return
|
| 209 |
if not exclusion:
|
| 210 |
+
yield "Provide at least one exclusion criterion (or upload criteria file).", gr.update(
|
| 211 |
+
value=None, visible=False, interactive=False
|
| 212 |
+
)
|
| 213 |
+
return
|
| 214 |
|
| 215 |
criteria_obj = {
|
| 216 |
"topic": topic,
|
|
|
|
| 261 |
worker = threading.Thread(target=_run_screening, daemon=True)
|
| 262 |
worker.start()
|
| 263 |
|
| 264 |
+
yield "Preparing screening...", gr.update(value=None, visible=False, interactive=False)
|
| 265 |
+
|
| 266 |
while worker.is_alive():
|
| 267 |
elapsed_s = int(time.perf_counter() - started)
|
| 268 |
eta_s = int(progress_state["eta_seconds"])
|
|
|
|
| 282 |
else:
|
| 283 |
status = f"Initializing client... Elapsed {elapsed_min:02d}:{elapsed_sec:02d}"
|
| 284 |
|
| 285 |
+
yield status, gr.update(value=None, visible=False, interactive=False)
|
| 286 |
time.sleep(1)
|
| 287 |
|
| 288 |
if worker_error["exc"] is not None:
|
| 289 |
+
yield f"Screening failed: {worker_error['exc']}", gr.update(
|
| 290 |
+
value=None, visible=False, interactive=False
|
| 291 |
+
)
|
| 292 |
return
|
| 293 |
|
| 294 |
progress(1, desc="Screening complete.")
|
| 295 |
+
yield (
|
| 296 |
+
f"Screening complete: {screened_output.name}",
|
| 297 |
+
gr.update(value=str(screened_output), visible=True, interactive=True),
|
| 298 |
+
)
|
| 299 |
|
| 300 |
|
| 301 |
def build_app() -> gr.Blocks:
|
|
|
|
| 324 |
)
|
| 325 |
parse_btn = gr.Button("1) Run Parser", variant="primary")
|
| 326 |
parse_status = gr.Textbox(label="Parser Status", interactive=False)
|
| 327 |
+
parsed_excel_download = gr.DownloadButton(
|
| 328 |
+
"Download Parsed Excel",
|
| 329 |
+
value=None,
|
| 330 |
+
visible=False,
|
| 331 |
+
interactive=False,
|
| 332 |
+
)
|
| 333 |
screen_btn = gr.Button("2) Screen Excel", variant="primary")
|
| 334 |
screen_status = gr.Textbox(label="Screening Status", interactive=False)
|
| 335 |
+
screened_excel_download = gr.DownloadButton(
|
| 336 |
+
"Download Screened Excel",
|
| 337 |
+
value=None,
|
| 338 |
+
visible=False,
|
| 339 |
+
interactive=False,
|
| 340 |
+
)
|
| 341 |
|
| 342 |
with gr.Column():
|
| 343 |
criteria_file = gr.File(
|