AbhijitClemson commited on
Commit
bb7e19a
·
verified ·
1 Parent(s): 525a336

Update page_files/Upload_Data.py

Browse files
Files changed (1) hide show
  1. page_files/Upload_Data.py +1240 -863
page_files/Upload_Data.py CHANGED
@@ -1,887 +1,1264 @@
1
- import os
2
- import json
3
- import tempfile
4
- import base64
5
-
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import fitz # PyMuPDF
 
7
  import pandas as pd
 
8
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  from data_loader import insert_material_rows
11
- from page_files.categorized.Backend.upload_backend import (
12
- call_gemini_from_bytes,
13
- convert_to_dataframe,
14
- create_zip,
15
- extract_images,
16
- save_matched_images,
17
- save_single_image_with_property,
18
- )
19
- def inject_upload_page_styles():
20
- st.markdown(
21
- """
22
- <style>
23
- @import url("https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;600;700;800&display=swap");
24
-
25
- [data-testid="stHeader"] {
26
- display: none !important;
27
- }
28
-
29
- .stApp {
30
- background: #f3f6fb !important;
31
- }
32
-
33
- html, body, [class*="css"] {
34
- font-family: "DM Sans", sans-serif !important;
35
- }
36
-
37
- .block-container {
38
- max-width: 980px !important;
39
- padding-top: 1rem !important;
40
- padding-bottom: 2rem !important;
41
- }
42
- .st-emotion-cache-tn0cau {
43
- background: #ffffff !important;
44
-
45
-
46
-
47
- }
48
- div[class*="st-key-ud_main_card"] > div[data-testid="stVerticalBlockBorderWrapper"] > div {
49
- background: #ffffff !important;
50
- border: 1px solid #dbe3ee !important;
51
- border-radius: 16px !important;
52
- padding: 28px 32px 32px 32px !important;
53
- box-shadow: 0 4px 24px rgba(15, 23, 42, 0.08) !important;
54
- }
55
-
56
-
57
- /* Card wrapper like cardref */
58
- .upload-card {
59
- max-width: 960px;
60
- margin: 2.5rem auto;
61
- padding: 2.25rem 2.5rem;
62
- border-radius: 18px;
63
- background: #ffffff;
64
- box-shadow: 0 18px 45px rgba(15, 23, 42, 0.09);
65
- border: 1px solid #e4e7f0;
66
- }
67
-
68
- /* Upload section layout */
69
- .upload-section {
70
- display: flex;
71
- align-items: center; /* vertical alignment */
72
- justify-content: space-between;
73
- gap: 1.5rem;
74
- margin-top: 1.25rem;
75
- }
76
-
77
- .upload-dropzone {
78
- flex: 1;
79
- }
80
-
81
- .upload-button-wrap {
82
- display: flex;
83
- align-items: center;
84
- justify-content: center;
85
- }
86
-
87
- .upload-button-wrap button {
88
- min-width: 160px;
89
- }
90
-
91
- div[class*="st-key-ud_main_card"] [data-testid="stVerticalBlockBorderWrapper"] {
92
- background: #ffffff !important;
93
- border: 1px solid #dbe3ee !important;
94
- border-radius: 16px !important;
95
- box-shadow: 0 4px 24px rgba(15, 23, 42, 0.08) !important;
96
- }
97
- span.st-emotion-cache-epvm6 {
98
- display: flex !important;
99
- justify-content: center !important;
100
- width: 100% !important;
101
- }
102
- .ud-page-title {
103
- color: #111827;
104
- font-size: 2.2rem;
105
- line-height: 1.08;
106
- font-weight: 800;
107
- margin: 0 0 8px 0;
108
- }
109
-
110
- .ud-page-desc {
111
- color: #64748b;
112
- font-size: 1rem;
113
- margin: 0 0 16px 0;
114
- }
115
-
116
- .ud-topbar {
117
- display: flex;
118
- align-items: center;
119
- gap: 10px;
120
- background: #bae1fc;
121
- border: 4px solid #d7e4f2;
122
- border-radius: 20px;
123
- color: #111827;
124
- font-size: 1.05rem;
125
- font-weight: 700;
126
- padding: 12px 14px;
127
- margin-bottom: 7px;
128
- }
129
-
130
- .ud-topbar img {
131
- width: 20px;
132
- height: 20px;
133
- object-fit: contain;
134
- border-radius: 4px;
135
- }
136
-
137
-
138
-
139
- div[class*="st-key-material_ident_card"] [data-testid="stVerticalBlockBorderWrapper"] {
140
- background: transparent !important;
141
- border: 0 !important;
142
- border-radius: 0 !important;
143
- padding: 0 !important;
144
- box-shadow: none !important;
145
- }
146
-
147
- div[class*="st-key-material_form_card"] [data-testid="stVerticalBlockBorderWrapper"] {
148
- background: transparent !important;
149
- border: 0 !important;
150
- border-radius: 0 !important;
151
- padding: 0 !important;
152
- box-shadow: none !important;
153
- }
154
-
155
- .ud-ident-title {
156
- color: #111827;
157
- font-size: 2rem;
158
- font-weight: 800;
159
- margin: 4px 0 8px 2px;
160
- display: flex;
161
- align-items: center;
162
- gap: 8px;
163
- }
164
-
165
- .ud-sec-icon {
166
- width: 18px;
167
- height: 18px;
168
- border-radius: 999px;
169
- background: #2563eb;
170
- color: #ffffff;
171
- display: inline-flex;
172
- align-items: center;
173
- justify-content: center;
174
- font-size: 0.72rem;
175
- font-weight: 700;
176
- line-height: 1;
177
- }
178
-
179
- .ud-upload-title {
180
- color: #111827;
181
- font-size: 1.9rem;
182
- font-weight: 800;
183
- margin: 12px 0 8px 0;
184
- display: flex;
185
- align-items: center;
186
- gap: 8px;
187
- }
188
-
189
- div[class*="st-key-material_ident_card"] label p {
190
- color: #1f2937 !important;
191
- font-size: 0.95rem !important;
192
- font-weight: 600 !important;
193
- }
194
-
195
- div[class*="st-key-material_ident_card"] div[data-baseweb="select"] > div,
196
- div[class*="st-key-material_ident_card"] div[data-baseweb="input"] > div {
197
- min-height: 46px !important;
198
- border-radius: 10px !important;
199
- border: 1px solid #d6dee8 !important;
200
- background: #f8fafc !important;
201
- }
202
-
203
- [data-testid="stFileUploaderDropzone"] {
204
- background: #f8fbff !important;
205
- border: 2px dashed #d4deea !important;
206
- border-radius: 14px !important;
207
- min-height: 230px !important;
208
- padding: 1.4rem !important;
209
- position: relative !important;
210
- display: flex !important;
211
- flex-direction: column !important;
212
- align-items: center !important;
213
- justify-content: center !important;
214
- }
215
-
216
- /* The inner flex column — center everything */
217
- [data-testid="stFileUploaderDropzone"] > div {
218
- display: flex !important;
219
- flex-direction: column !important;
220
- align-items: center !important;
221
- justify-content: center !important;
222
- text-align: center !important;
223
- gap: 10px !important;
224
- width: 100% !important;
225
- }
226
-
227
- /* Browse files button itself */
228
- [data-testid="stFileUploaderDropzone"] button,
229
- [data-testid="stFileUploaderDropzone"] > div button {
230
- background: #2f6fe4 !important;
231
- color: #ffffff !important;
232
- border: 0 !important;
233
- border-radius: 9px !important;
234
- font-weight: 700 !important;
235
- padding: 0.45rem 1.25rem !important;
236
- display: block !important;
237
- margin: 0 auto !important;
238
- }
239
-
240
- /* Streamlit wraps button in a top-level span; center that wrapper */
241
- [data-testid="stFileUploaderDropzone"] > span {
242
- display: flex !important;
243
- justify-content: center !important;
244
- width: 100% !important;
245
- margin-top: 0.5rem !important;
246
- }
247
-
248
- [data-testid="stFileUploaderDropzone"] [data-testid="stFileUploaderDropzoneInstructions"] {
249
- width: 100% !important;
250
- display: flex !important;
251
- flex-direction: column !important;
252
- align-items: center !important;
253
- justify-content: center !important;
254
- text-align: center !important;
255
- }
256
-
257
- /* The "Limit 200MB" small text */
258
- [data-testid="stFileUploaderDropzone"] small {
259
- font-size: 0.96rem !important;
260
- text-align: center !important;
261
- display: block !important;
262
- }
263
-
264
- /* Cloud icon / drag text paragraph */
265
- [data-testid="stFileUploaderDropzone"] p,
266
- [data-testid="stFileUploaderDropzone"] div > p {
267
- text-align: center !important;
268
- width: 100% !important;
269
- }
270
- </style>
271
- """,
272
- unsafe_allow_html=True,
273
- )
274
-
275
-
276
- def render_top_bar():
277
- logo_html = ""
278
- try:
279
- with open("logo.png", "rb") as fh:
280
- logo_b64 = base64.b64encode(fh.read()).decode()
281
- logo_html = f"<img src='data:image/png;base64,{logo_b64}' alt='AIM'/>"
282
- except Exception:
283
- logo_html = ""
284
-
285
- st.markdown(
286
- f"<div class='ud-topbar'>{logo_html}<span>AIM Composites</span></div>",
287
- unsafe_allow_html=True,
288
- )
289
-
290
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  def input_form():
292
- property_categories = {
293
- "Polymer": [
294
- "Thermal",
295
- "Mechanical",
296
- "Processing",
297
- "Physical",
298
- "Descriptive",
299
- ],
300
- "Fiber": [
301
- "Mechanical",
302
- "Physical",
303
- "Thermal",
304
- "Descriptive",
305
- ],
306
- "Composite": [
307
- "Mechanical",
308
- "Thermal",
309
- "Processing",
310
- "Physical",
311
- "Descriptive",
312
- "Composition / Reinforcement",
313
- "Architecture / Structure",
314
- ],
315
- }
316
-
317
- property_names = {
318
- "Polymer": {
319
- "Thermal": [
320
- "Glass transition temperature (Tg)",
321
- "Melting temperature (Tm)",
322
- "Crystallization temperature (Tc)",
323
- "Degree of crystallinity",
324
- "Decomposition temperature",
325
- ],
326
- "Mechanical": [
327
- "Tensile modulus",
328
- "Tensile strength",
329
- "Elongation at break",
330
- "Flexural modulus",
331
- "Impact strength",
332
- ],
333
- "Processing": [
334
- "Melt flow index (MFI)",
335
- "Processing temperature",
336
- "Cooling rate",
337
- "Mold shrinkage",
338
- ],
339
- "Physical": [
340
- "Density",
341
- "Specific gravity",
342
- ],
343
- "Descriptive": [
344
- "Material grade",
345
- "Manufacturer",
346
- ],
347
- },
348
- "Fiber": {
349
- "Mechanical": [
350
- "Tensile modulus",
351
- "Tensile strength",
352
- "Strain to failure",
353
- ],
354
- "Physical": [
355
- "Density",
356
- "Fiber diameter",
357
- ],
358
- "Thermal": [
359
- "Decomposition temperature",
360
- ],
361
- "Descriptive": [
362
- "Fiber type",
363
- "Surface treatment",
364
- ],
365
- },
366
- "Composite": {
367
- "Mechanical": [
368
- "Longitudinal modulus (E1)",
369
- "Transverse modulus (E2)",
370
- "Shear modulus (G12)",
371
- "Poissons ratio (V12)",
372
- "Tensile strength (fiber direction)",
373
- "Interlaminar shear strength",
374
- ],
375
- "Thermal": [
376
- "Glass transition temperature (matrix)",
377
- "Coefficient of thermal expansion (CTE)",
378
- ],
379
- "Processing": [
380
- "Curing temperature",
381
- "Curing pressure",
382
- ],
383
- "Physical": [
384
- "Density",
385
- ],
386
- "Descriptive": [
387
- "Laminate type",
388
- ],
389
- "Composition / Reinforcement": [
390
- "Fiber volume fraction",
391
- "Fiber weight fraction",
392
- "Fiber type",
393
- "Matrix type",
394
- ],
395
- "Architecture / Structure": [
396
- "Weave type",
397
- "Ply orientation",
398
- "Number of plies",
399
- "Stacking sequence",
400
- ],
401
- },
402
- }
403
-
404
- with st.container(border=False, key="material_ident_card"):
405
- st.markdown("<div class='ud-ident-title'><span class='ud-sec-icon'>i</span>Material Identification</div>", unsafe_allow_html=True)
406
-
407
- col_a, col_b = st.columns(2)
408
- with col_a:
409
- material_class = st.selectbox(
410
- "Material Class",
411
- ("Polymer", "Fiber", "Composite"),
412
- index=None,
413
- placeholder="Choose material class",
414
- key="manual_material_class",
415
- )
416
- with col_b:
417
- if material_class:
418
- property_category = st.selectbox(
419
- "Property Type",
420
- property_categories[material_class],
421
- index=None,
422
- placeholder="Choose property type",
423
- key="manual_property_category",
424
- )
425
- else:
426
- property_category = None
427
- st.selectbox(
428
- "Property Type",
429
- ["Choose material class first"],
430
- index=0,
431
- disabled=True,
432
- key="manual_property_category_disabled",
433
- )
434
-
435
- if material_class and property_category:
436
- property_options = property_names[material_class][property_category] + ["Something else"]
437
- property_name = st.selectbox(
438
- "Property Name",
439
- property_options,
440
- index=None,
441
- placeholder="Choose property",
442
- key="manual_property_name",
443
- )
444
- else:
445
- property_name = None
446
-
447
- custom_property_name = ""
448
- if property_name == "Something else":
449
- custom_property_name = st.text_input(
450
- "Custom Property Name",
451
- placeholder="Type property name",
452
- key="manual_custom_property_name",
453
- ).strip()
454
-
455
- selected_property_name = (
456
- custom_property_name if property_name == "Something else" else property_name
457
- )
458
-
459
- if material_class and property_category and selected_property_name:
460
- with st.container(border=False, key="material_form_card"):
461
- with st.form("user_input"):
462
- st.subheader("Enter Data")
463
-
464
- material_name = st.text_input("Material Name")
465
- material_abbr = st.text_input("Material Abbreviation")
466
-
467
- value = st.text_input("Value")
468
- unit = st.text_input("Unit (SI)")
469
- english = st.text_input("English Units")
470
- test_condition = st.text_input("Test Condition")
471
- comments = st.text_area("Comments")
472
-
473
- submitted = st.form_submit_button("Submit")
474
 
475
  if submitted:
476
  if not (material_name and value):
477
  st.error("Material name and value are required.")
478
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  else:
480
- input_db = pd.DataFrame(
481
- [
482
- {
483
- "material_class": material_class,
484
- "material_name": material_name,
485
- "material_abbreviation": material_abbr,
486
- "section": property_category,
487
- "property_name": selected_property_name,
488
- "value": value,
489
- "unit": unit,
490
- "english": english,
491
- "test_condition": test_condition,
492
- "comments": comments,
493
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  )
 
 
 
 
 
496
 
497
- try:
498
- inserted = insert_material_rows(input_db)
499
- except Exception as exc:
500
- st.error(f"Failed to save to PostgreSQL: {exc}")
501
- return False
502
 
503
- if inserted <= 0:
504
- st.error("No rows were inserted into PostgreSQL.")
505
- return False
 
 
 
 
 
 
506
 
507
- st.cache_data.clear()
508
- st.success("Property added successfully to PostgreSQL.")
509
- st.dataframe(input_db)
510
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
512
- return False
513
 
514
- return False
515
-
516
-
517
- def main():
518
- inject_upload_page_styles()
519
- render_top_bar()
520
-
521
-
522
- st.subheader("Submit Scientific Material")
523
- st.caption("Provide technical data and research documentation for the central repository.")
524
-
525
-
526
- if "image_results" not in st.session_state:
527
- st.session_state.image_results = []
528
- if "pdf_processed" not in st.session_state:
529
- st.session_state.pdf_processed = False
530
- if "current_pdf_name" not in st.session_state:
531
- st.session_state.current_pdf_name = None
532
- if "form_submitted" not in st.session_state:
533
- st.session_state.form_submitted = False
534
- if "pdf_data_extracted" not in st.session_state:
535
- st.session_state.pdf_data_extracted = False
536
- if "pdf_extracted_df" not in st.session_state:
537
- st.session_state.pdf_extracted_df = pd.DataFrame()
538
- if "saved_image_mapping" not in st.session_state:
539
- st.session_state.saved_image_mapping = {}
540
-
541
-
542
  with st.container(border=True, key="ud_main_card"):
543
  if input_form():
544
  st.session_state.form_submitted = True
545
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
 
547
- st.markdown("<div class='ud-upload-title'><span class='ud-sec-icon'>i</span>Research Documentation</div>", unsafe_allow_html=True)
548
-
549
- uploaded_file = st.file_uploader(
550
- "Upload PDF (Material Datasheet or Research Paper)", type=["pdf"]
551
- )
552
-
553
-
554
- if not uploaded_file:
555
- st.info("Upload a PDF to extract material data and plots")
556
-
557
- if not uploaded_file:
558
- st.session_state.pdf_processed = False
559
- st.session_state.current_pdf_name = None
560
- st.session_state.image_results = []
561
- st.session_state.form_submitted = False
562
- st.session_state.pdf_data_extracted = False
563
- st.session_state.pdf_extracted_df = pd.DataFrame()
564
- st.session_state.saved_image_mapping = {}
565
- return
566
-
567
- paper_id = os.path.splitext(uploaded_file.name)[0].replace(" ", "_")
568
-
569
- if st.session_state.current_pdf_name != uploaded_file.name:
570
- st.session_state.pdf_processed = False
571
- st.session_state.current_pdf_name = uploaded_file.name
572
- st.session_state.image_results = []
573
- st.session_state.form_submitted = False
574
- st.session_state.saved_image_mapping = {}
575
-
576
- if st.session_state.form_submitted:
577
- st.session_state.form_submitted = False
578
- st.info(
579
- "A Form was submitted. But your previous extracted data has been added already. "
580
- "If you want to extract more data/plots upload again"
581
- )
582
- tab1, tab2 = st.tabs(["Material Data", "Extracted Plots"])
583
- with tab1:
584
- st.info("Material data from form has been added to database.")
585
- with tab2:
586
- st.info("Plots already extracted")
587
- return
588
-
589
- tab1, tab2 = st.tabs([" Material Data", " Extracted Plots"])
590
-
591
- with tempfile.TemporaryDirectory() as tmpdir:
592
- pdf_path = os.path.join(tmpdir, uploaded_file.name)
593
- with open(pdf_path, "wb") as f:
594
- f.write(uploaded_file.getbuffer())
595
-
596
- with tab1:
597
- st.subheader("Material Properties Data")
598
-
599
- if not st.session_state.pdf_data_extracted:
600
- with st.spinner(" Extracting material data..."):
601
- with open(pdf_path, "rb") as f:
602
- pdf_bytes = f.read()
603
-
604
- data = call_gemini_from_bytes(pdf_bytes, uploaded_file.name)
605
-
606
- if data:
607
- df = convert_to_dataframe(data)
608
- if not df.empty:
609
- st.session_state.pdf_extracted_df = df
610
- st.session_state.pdf_data_extracted = True
611
- st.session_state.pdf_extracted_meta = data
612
- else:
613
- st.warning("No data extracted")
614
- else:
615
- st.error("Failed to extract data from PDF")
616
-
617
- df = st.session_state.pdf_extracted_df
618
-
619
- if not df.empty:
620
- data = st.session_state.get("pdf_extracted_meta", {})
621
- st.success(f"Extracted {len(df)} properties")
622
-
623
- col1, col2 = st.columns(2)
624
- with col1:
625
- st.metric("Material", data.get("material_name", "N/A"))
626
- with col2:
627
- st.metric("Abbreviation", data.get("material_abbreviation", "N/A"))
628
-
629
- st.dataframe(df, use_container_width=True, height=400)
630
- st.subheader("Assign Material Category")
631
-
632
- extracted_material_class = st.selectbox(
633
- "Select category for this material",
634
- ["Polymer", "Fiber", "Composite"],
635
- index=None,
636
- placeholder="Required before adding to database",
637
- )
638
-
639
- if st.button("+Add to Database"):
640
- if not extracted_material_class:
641
- st.error("Please select a material category before adding.")
642
- else:
643
- df["material_class"] = extracted_material_class
644
- df["material_type"] = extracted_material_class
645
-
646
- if st.session_state.image_results:
647
- with st.spinner("Saving matched plot images..."):
648
- saved_images = save_matched_images(
649
- df,
650
- st.session_state.image_results,
651
- save_dir="images",
652
- )
653
-
654
- if saved_images:
655
- st.success(f" Saved {len(saved_images)} plot image(s)")
656
- with st.expander("View saved images"):
657
- for img_info in saved_images:
658
- st.write(
659
- f"? **{img_info['property']}** ? {img_info['caption']}"
660
- )
661
- st.write(f" Saved to: `{img_info['path']}`")
662
- else:
663
- st.info("? No plots matched the extracted properties")
664
-
665
- if "user_uploaded_data" not in st.session_state:
666
- st.session_state["user_uploaded_data"] = df
667
- else:
668
- st.session_state["user_uploaded_data"] = pd.concat(
669
- [st.session_state["user_uploaded_data"], df],
670
- ignore_index=True,
671
- )
672
-
673
- st.success(f"Added to {extracted_material_class} database!")
674
-
675
- with tab2:
676
- st.subheader("Extracted Plot Images")
677
-
678
- if not st.session_state.pdf_processed:
679
- with st.spinner(" Extracting plots from PDF..."):
680
- doc = fitz.open(pdf_path)
681
- st.session_state.image_results = extract_images(doc)
682
- doc.close()
683
- st.session_state.pdf_processed = True
684
-
685
- if st.session_state.image_results:
686
- has_extracted_data = not st.session_state.pdf_extracted_df.empty
687
-
688
- if has_extracted_data:
689
- mat_abbr = st.session_state.pdf_extracted_df.iloc[0][
690
- "material_abbreviation"
691
- ]
692
- property_list = (
693
- st.session_state.pdf_extracted_df["property_name"].unique().tolist()
694
- )
695
-
696
- st.info(
697
- f" Material: **{mat_abbr}** | {len(property_list)} properties available for mapping"
698
- )
699
- else:
700
- st.warning(
701
- " No extracted material data found. Please extract material data first (Tab 1) to enable property mapping."
702
- )
703
-
704
- subtab1, subtab2 = st.tabs([" Images", "JSON Preview"])
705
-
706
- with subtab1:
707
- st.success(
708
- f"Extracted {len(st.session_state.image_results)} plots"
709
- )
710
-
711
- col_img, col_json, col_all = st.columns(3)
712
-
713
- with col_img:
714
- img_zip = create_zip(st.session_state.image_results, include_json=False)
715
- st.download_button(
716
- " Download Images Only",
717
- data=img_zip,
718
- file_name=f"{paper_id}_images.zip",
719
- mime="application/zip",
720
- use_container_width=True,
721
- key="download_images",
722
- )
723
-
724
- with col_json:
725
- json_data = [
726
- {
727
- "caption": r["caption"],
728
- "page": r["page"],
729
- "image_count": len(r["image_data"]),
730
- }
731
- for r in st.session_state.image_results
732
- ]
733
- st.download_button(
734
- " Download JSON",
735
- data=json.dumps(json_data, indent=4),
736
- file_name=f"{paper_id}_metadata.json",
737
- mime="application/json",
738
- use_container_width=True,
739
- key="download_json_top",
740
- )
741
-
742
- with col_all:
743
- full_zip = create_zip(st.session_state.image_results, include_json=True)
744
- st.download_button(
745
- " Download All",
746
- data=full_zip,
747
- file_name=f"{paper_id}_complete.zip",
748
- mime="application/zip",
749
- use_container_width=True,
750
- key="download_all",
751
- )
752
-
753
- st.divider()
754
-
755
- if st.session_state.saved_image_mapping:
756
- with st.expander(" Saved Image Mappings", expanded=False):
757
- for img_key, mapping_info in st.session_state.saved_image_mapping.items():
758
- st.write(
759
- f" **{mapping_info['caption']}** ? `{mapping_info['property']}`"
760
- )
761
- st.write(
762
- f" Saved as: `{mapping_info['filename']}`"
763
- )
764
- st.divider()
765
-
766
- results_copy = st.session_state.image_results.copy()
767
-
768
- for idx in range(len(results_copy)):
769
- if idx >= len(st.session_state.image_results):
770
- break
771
-
772
- result = st.session_state.image_results[idx]
773
-
774
- with st.container(border=True):
775
- col_cap, col_btn = st.columns([0.85, 0.15])
776
- col_cap.markdown(
777
- f"**Page {result['page']}** - {result['caption']}"
778
- )
779
-
780
- if col_btn.button("Delete", key=f"del_g_{idx}_{result['page']}"):
781
- del st.session_state.image_results[idx]
782
- st.rerun()
783
-
784
- image_data_list = result["image_data"]
785
- if image_data_list and len(image_data_list) > 0:
786
- for p_idx in range(len(image_data_list)):
787
- if p_idx >= len(st.session_state.image_results[idx]["image_data"]):
788
- break
789
-
790
- img_data = st.session_state.image_results[idx]["image_data"][p_idx]
791
- img_unique_key = f"{idx}_{p_idx}_{result['page']}"
792
-
793
- st.image(img_data["array"], width=300, channels="BGR")
794
-
795
- if has_extracted_data:
796
- col_dropdown, col_add_btn, col_remove = st.columns(
797
- [0.6, 0.2, 0.2]
798
- )
799
-
800
- with col_dropdown:
801
- selected_property = st.selectbox(
802
- "Select Property",
803
- options=["-- Select --"] + property_list,
804
- key=f"prop_select_{img_unique_key}",
805
- label_visibility="collapsed",
806
- )
807
-
808
- with col_add_btn:
809
- if st.button(" Add", key=f"add_btn_{img_unique_key}"):
810
- if selected_property and selected_property != "-- Select --":
811
- filepath = save_single_image_with_property(
812
- img_data["array"],
813
- mat_abbr,
814
- selected_property,
815
- save_dir="images",
816
- )
817
-
818
- st.session_state.saved_image_mapping[
819
- img_unique_key
820
- ] = {
821
- "property": selected_property,
822
- "caption": result["caption"],
823
- "filename": os.path.basename(filepath),
824
- "path": filepath,
825
- }
826
-
827
- st.success(
828
- f" Saved as `{mat_abbr}_{selected_property}.png`"
829
- )
830
- st.rerun()
831
- else:
832
- st.warning("Please select a property first")
833
-
834
- with col_remove:
835
- if st.button("Remove", key=f"del_s_{img_unique_key}"):
836
- if img_unique_key in st.session_state.saved_image_mapping:
837
- del st.session_state.saved_image_mapping[img_unique_key]
838
-
839
- del st.session_state.image_results[idx]["image_data"][p_idx]
840
- if len(st.session_state.image_results[idx]["image_data"]) == 0:
841
- del st.session_state.image_results[idx]
842
- st.rerun()
843
-
844
- if img_unique_key in st.session_state.saved_image_mapping:
845
- mapping = st.session_state.saved_image_mapping[img_unique_key]
846
- st.info(f"Mapped to: **{mapping['property']}**")
847
- else:
848
- col_info, col_remove = st.columns([0.8, 0.2])
849
- with col_info:
850
- st.caption(
851
- "Extract material data first to enable property mapping"
852
- )
853
- with col_remove:
854
- if st.button("Remove", key=f"del_s_{img_unique_key}"):
855
- del st.session_state.image_results[idx]["image_data"][p_idx]
856
- if len(st.session_state.image_results[idx]["image_data"]) == 0:
857
- del st.session_state.image_results[idx]
858
- st.rerun()
859
-
860
- st.divider()
861
-
862
- with subtab2:
863
- st.subheader("Metadata Preview")
864
- json_data = [
865
- {
866
- "caption": r["caption"],
867
- "page": r["page"],
868
- "image_count": len(r["image_data"]),
869
- "images": [img["filename"] for img in r["image_data"]],
870
- }
871
- for r in st.session_state.image_results
872
- ]
873
-
874
- st.download_button(
875
- " Download JSON",
876
- data=json.dumps(json_data, indent=4),
877
- file_name=f"{paper_id}_metadata.json",
878
- mime="application/json",
879
- key="download_json_bottom",
880
- )
881
-
882
- st.json(json_data)
883
- else:
884
- st.warning("No plots found in PDF")
885
-
886
-
887
- main()
 
1
+ import logging
2
+ import sys
3
+ import os
4
+
5
+ log = logging.getLogger(__name__)
6
+
7
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
8
+ sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
9
+
10
+ import io
11
+ import json
12
+ import tempfile
13
+ import base64
14
+ import zipfile
15
+ import re
16
+ from io import BytesIO
17
+ import time
18
+ import cv2
19
  import fitz # PyMuPDF
20
+ import numpy as np
21
  import pandas as pd
22
+ import requests
23
  import streamlit as st
24
+ from PIL import Image
25
+
26
+
27
+ from dotenv import load_dotenv
28
+ load_dotenv()
29
+
30
+ _GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
31
+ if not _GEMINI_API_KEY:
32
+ raise RuntimeError("GEMINI_API_KEY not set in environment")
33
+
34
+ # ── imports from doctodb_rag (data extraction) ────────────────────────────────
35
+ from categorized.Backend.PDF_DataExtraction import run_pipeline
36
+
37
+ # ── imports from figure_extractor (image extraction) ─────────────────────────
38
+ from categorized.Backend.Pdf_ImageExtraction import (
39
+ GEMINI_MODEL as GEMINI_MODEL,
40
+ get_plot_data_from_llm,
41
+ extract_plots,
42
+ )
43
 
44
  from data_loader import insert_material_rows
45
+ from categorized.Backend.plot_property_mapper import (
46
+ batch_map_plots,
47
+ fetch_properties_for_material,
48
+ save_plot_image_mapping,
49
+ save_plot_image_to_db,
50
+ )
51
+ from db import fetch_all
52
+
53
+
54
+ # ─────────────────────────────────────────────────────────────────────────────
55
+ # Helpers that were previously in upload_backend
56
+ # ─────────────────────────────────────────────────────────────────────────────
57
+
58
+ def _df_to_meta(df: pd.DataFrame) -> dict:
59
+ """Re-create the flat metadata dict that the UI previously got from Gemini."""
60
+ if df.empty:
61
+ return {}
62
+ row0 = df.iloc[0]
63
+ props = df.to_dict(orient="records")
64
+ return {
65
+ "material_name": str(row0.get("material_name", "")),
66
+ "material_abbreviation": str(row0.get("material_abbreviation", "")),
67
+ "trade_grade": str(row0.get("trade_grade", "")),
68
+ "manufacturer": str(row0.get("manufacturer", "")),
69
+ "mechanical_properties": props,
70
+ }
71
+
72
+
73
+ def create_zip(image_results: list, include_json: bool = True) -> bytes:
74
+ """
75
+ Pack extracted plot images (and optional JSON metadata) into a ZIP.
76
+ Each item in image_results has: caption, page, image_data (list of dicts
77
+ with 'array' (BGR ndarray) and 'filename').
78
+ """
79
+ buf = io.BytesIO()
80
+ with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
81
+ meta = []
82
+ for item in image_results:
83
+ caption = item.get("caption", "")
84
+ page = item.get("page", "?")
85
+ for img_dict in item.get("image_data", []):
86
+ bgr = img_dict.get("array")
87
+ filename = img_dict.get("filename", "plot.png")
88
+ if bgr is not None:
89
+ ok, enc = cv2.imencode(".png", bgr)
90
+ if ok:
91
+ zf.writestr(filename, enc.tobytes())
92
+ if include_json:
93
+ meta.append({
94
+ "caption": caption,
95
+ "page": page,
96
+ "image_count": len(item.get("image_data", [])),
97
+ "images": [d.get("filename") for d in item.get("image_data", [])],
98
+ })
99
+ if include_json and meta:
100
+ zf.writestr("metadata.json", json.dumps(meta, indent=4))
101
+ return buf.getvalue()
102
+
103
+
104
+ def save_matched_images(
105
+ df: pd.DataFrame,
106
+ image_results: list,
107
+ save_dir: str = "images",
108
+ ) -> list:
109
+ """
110
+ Heuristically match extracted plot captions to property names in df and
111
+ save matched images to disk. Returns list of match-info dicts.
112
+ """
113
+ os.makedirs(save_dir, exist_ok=True)
114
+ saved = []
115
+ props = df["property_name"].str.lower().tolist() if "property_name" in df.columns else []
116
+
117
+ for item in image_results:
118
+ caption = (item.get("caption") or "").lower()
119
+ best_prop = None
120
+ best_score = 0
121
+ for prop in props:
122
+ # simple overlap score: shared words
123
+ cap_words = set(re.findall(r"\w+", caption))
124
+ prop_words = set(re.findall(r"\w+", prop))
125
+ score = len(cap_words & prop_words)
126
+ if score > best_score:
127
+ best_score = score
128
+ best_prop = prop
129
+
130
+ if best_prop and best_score > 0:
131
+ for idx, img_dict in enumerate(item.get("image_data", [])):
132
+ bgr = img_dict.get("array")
133
+ if bgr is None:
134
+ continue
135
+ safe_prop = re.sub(r"[^\w\-]", "_", best_prop)
136
+ filename = f"{safe_prop}_{idx}.png"
137
+ filepath = os.path.join(save_dir, filename)
138
+ cv2.imwrite(filepath, bgr)
139
+ saved.append({
140
+ "property": best_prop,
141
+ "caption": item.get("caption", ""),
142
+ "path": filepath,
143
+ })
144
+ return saved
145
+
146
+
147
+ def save_single_image_with_property(
148
+ bgr: np.ndarray,
149
+ property_name: str,
150
+ save_dir: str = "images",
151
+ ) -> str:
152
+ """Save a single BGR image tagged with a property name. Returns filepath."""
153
+ os.makedirs(save_dir, exist_ok=True)
154
+ safe = re.sub(r"[^\w\-]", "_", property_name)
155
+ filepath = os.path.join(save_dir, f"{safe}.png")
156
+ cv2.imwrite(filepath, bgr)
157
+ return filepath
158
+
159
+
160
+ # ─────────────────────────────────────────────────────────────────────────────
161
+ # extract_images adapter
162
+ # Bridges figure_extractor's extract_plots API to the image_results list shape
163
+ # expected by the rest of the UI (list of {caption, page, image_data}).
164
+ # ─────────────────────────────────────────────────────────────────────────────
165
+
166
+ _GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyBzyMFKEqcjsWpR-OGAY42T250o1O39v3Y")
167
+
168
+ def extract_images(pdf_path: str) -> list:
169
+ """
170
+ Use figure_extractor to detect and crop plot images from a PDF path.
171
+ Returns a list compatible with the image_results shape used throughout the UI:
172
+ [{ "caption": str, "page": int, "image_data": [{"array": bgr_ndarray, "filename": str}] }]
173
+ """
174
+ try:
175
+ # gemini_model = init_gemini(_GEMINI_API_KEY)
176
+ plot_data = get_plot_data_from_llm( GEMINI_MODEL, pdf_path)
177
+ raw_plots = extract_plots(
178
+ pdf_path=pdf_path,
179
+ plot_data=plot_data,
180
+ pad=22,
181
+ score_thresh=0.35,
182
+ )
183
+ except Exception as e:
184
+ log.error(f"extract_images failed: {e}")
185
+ return []
186
+
187
+
188
+
189
+
190
+ # raw_plots items: {caption, page, path, plot_score, plot_type}
191
+ # Convert to image_results shape
192
+ image_results = []
193
+ for item in raw_plots:
194
+ bgr = cv2.imread(item["path"]) if item.get("path") else None
195
+ # clean up temp file written by extract_plots
196
+ if item.get("path") and os.path.exists(item["path"]):
197
+ try:
198
+ os.remove(item["path"])
199
+ except Exception:
200
+ pass
201
+
202
+ page = item.get("page", 1)
203
+ caption = item.get("caption", f"Figure (page {page})")
204
+ safe = re.sub(r"[^\w\-]", "_", caption)[:40]
205
+ filename = f"page{page}_{safe}.png"
206
+
207
+ image_results.append({
208
+ "caption": caption,
209
+ "page": page,
210
+ "image_data": [{"array": bgr, "filename": filename}] if bgr is not None else [],
211
+ })
212
+
213
+ return image_results
214
+
215
+
216
+ # ─────────────────────────────────────────────────────────────────────────────
217
+ # Styles
218
+ # ─────────────────────────────────────────────────────────────────────────────
219
+
220
+ def inject_upload_page_styles():
221
+ st.markdown(
222
+ """
223
+ <style>
224
+ @import url("https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;600;700;800&display=swap");
225
+
226
+ [data-testid="stHeader"] { display: none !important; }
227
+ .stApp { background: #f3f6fb !important; }
228
+ html, body, [class*="css"] { font-family: "DM Sans", sans-serif !important; }
229
+
230
+ .block-container {
231
+ max-width: 980px !important;
232
+ padding-top: 1rem !important;
233
+ padding-bottom: 2rem !important;
234
+ }
235
+
236
+ .st-emotion-cache-tn0cau { background: #ffffff !important; }
237
+
238
+ div[class*="st-key-ud_main_card"] > div[data-testid="stVerticalBlockBorderWrapper"] > div {
239
+ background: #ffffff !important;
240
+ border: 1px solid #dbe3ee !important;
241
+ border-radius: 16px !important;
242
+ padding: 28px 32px 32px 32px !important;
243
+ box-shadow: 0 4px 24px rgba(15, 23, 42, 0.08) !important;
244
+ }
245
+
246
+ div[class*="st-key-ud_main_card"] [data-testid="stVerticalBlockBorderWrapper"] {
247
+ background: #ffffff !important;
248
+ border: 1px solid #dbe3ee !important;
249
+ border-radius: 16px !important;
250
+ box-shadow: 0 4px 24px rgba(15, 23, 42, 0.08) !important;
251
+ }
252
+
253
+ span.st-emotion-cache-epvm6 {
254
+ display: flex !important;
255
+ justify-content: center !important;
256
+ width: 100% !important;
257
+ }
258
+
259
+ div[class*="st-key-material_ident_card"] [data-testid="stVerticalBlockBorderWrapper"],
260
+ div[class*="st-key-material_form_card"] [data-testid="stVerticalBlockBorderWrapper"] {
261
+ background: transparent !important;
262
+ border: 0 !important;
263
+ border-radius: 0 !important;
264
+ padding: 0 !important;
265
+ box-shadow: none !important;
266
+ }
267
+
268
+ div[class*="st-key-material_ident_card"] label p {
269
+ color: #1f2937 !important;
270
+ font-size: 0.95rem !important;
271
+ font-weight: 600 !important;
272
+ }
273
+
274
+ div[class*="st-key-material_ident_card"] div[data-baseweb="select"] > div,
275
+ div[class*="st-key-material_ident_card"] div[data-baseweb="input"] > div {
276
+ min-height: 46px !important;
277
+ border-radius: 10px !important;
278
+ border: 1px solid #d6dee8 !important;
279
+ background: #f8fafc !important;
280
+ }
281
+
282
+ [data-testid="stFileUploaderDropzone"] {
283
+ background: #f8fbff !important;
284
+ border: 2px dashed #d4deea !important;
285
+ border-radius: 14px !important;
286
+ min-height: 230px !important;
287
+ padding: 1.4rem !important;
288
+ position: relative !important;
289
+ display: flex !important;
290
+ flex-direction: column !important;
291
+ align-items: center !important;
292
+ justify-content: center !important;
293
+ }
294
+
295
+ [data-testid="stFileUploaderDropzone"] > div {
296
+ display: flex !important;
297
+ flex-direction: column !important;
298
+ align-items: center !important;
299
+ justify-content: center !important;
300
+ text-align: center !important;
301
+ gap: 10px !important;
302
+ width: 100% !important;
303
+ }
304
+
305
+ [data-testid="stFileUploaderDropzone"] button,
306
+ [data-testid="stFileUploaderDropzone"] > div button {
307
+ background: #2f6fe4 !important;
308
+ color: #ffffff !important;
309
+ border: 0 !important;
310
+ border-radius: 9px !important;
311
+ font-weight: 700 !important;
312
+ padding: 0.45rem 1.25rem !important;
313
+ display: block !important;
314
+ margin: 0 auto !important;
315
+ }
316
+
317
+ [data-testid="stFileUploaderDropzone"] > span {
318
+ display: flex !important;
319
+ justify-content: center !important;
320
+ width: 100% !important;
321
+ margin-top: 0.5rem !important;
322
+ }
323
+
324
+ [data-testid="stFileUploaderDropzone"] [data-testid="stFileUploaderDropzoneInstructions"] {
325
+ width: 100% !important;
326
+ display: flex !important;
327
+ flex-direction: column !important;
328
+ align-items: center !important;
329
+ justify-content: center !important;
330
+ text-align: center !important;
331
+ }
332
+
333
+ [data-testid="stFileUploaderDropzone"] small {
334
+ font-size: 0.96rem !important;
335
+ text-align: center !important;
336
+ display: block !important;
337
+ }
338
+
339
+ [data-testid="stFileUploaderDropzone"] p,
340
+ [data-testid="stFileUploaderDropzone"] div > p {
341
+ text-align: center !important;
342
+ width: 100% !important;
343
+ }
344
+
345
+ .ud-topbar {
346
+ display: flex;
347
+ align-items: center;
348
+ gap: 10px;
349
+ background: #bae1fc;
350
+ border: 4px solid #d7e4f2;
351
+ border-radius: 20px;
352
+ color: #111827;
353
+ font-size: 1.05rem;
354
+ font-weight: 700;
355
+ padding: 12px 14px;
356
+ margin-bottom: 7px;
357
+ }
358
+
359
+ .ud-topbar img { width: 20px; height: 20px; object-fit: contain; border-radius: 4px; }
360
+
361
+ .ud-ident-title {
362
+ color: #111827; font-size: 2rem; font-weight: 800;
363
+ margin: 4px 0 8px 2px; display: flex; align-items: center; gap: 8px;
364
+ }
365
+
366
+ .ud-upload-title {
367
+ color: #111827; font-size: 1.9rem; font-weight: 800;
368
+ margin: 12px 0 8px 0; display: flex; align-items: center; gap: 8px;
369
+ }
370
+
371
+ .ud-sec-icon {
372
+ width: 18px; height: 18px; border-radius: 999px;
373
+ background: #2563eb; color: #ffffff; display: inline-flex;
374
+ align-items: center; justify-content: center;
375
+ font-size: 0.72rem; font-weight: 700; line-height: 1;
376
+ }
377
+
378
+ .conf-badge {
379
+ display: inline-block;
380
+ padding: 2px 10px;
381
+ border-radius: 99px;
382
+ font-size: 0.78rem;
383
+ font-weight: 700;
384
+ color: #fff;
385
+ }
386
+
387
+ .plot-card-meta {
388
+ font-size: 0.82rem;
389
+ color: #64748b;
390
+ margin-bottom: 4px;
391
+ }
392
+ </style>
393
+ """,
394
+ unsafe_allow_html=True,
395
+ )
396
+
397
+
398
+ def render_top_bar():
399
+ logo_html = ""
400
+ try:
401
+ with open("logo.png", "rb") as fh:
402
+ logo_b64 = base64.b64encode(fh.read()).decode()
403
+ logo_html = f"<img src='data:image/png;base64,{logo_b64}' alt='AIM'/>"
404
+ except Exception:
405
+ pass
406
+ st.markdown(
407
+ f"<div class='ud-topbar'>{logo_html}<span>AIM Composites</span></div>",
408
+ unsafe_allow_html=True,
409
+ )
410
+
411
+
412
+ # ─────────────────────────────────────────────────────────────────────────────
413
+ # Helpers for tab2 mapping UI
414
+ # ─────────────────────────────────────────────────────────────────────────────
415
+
416
+ def _confidence_badge(conf: str) -> str:
417
+ colors = {"high": "#16a34a", "medium": "#d97706", "low": "#dc2626"}
418
+ c = colors.get((conf or "low").lower(), "#6b7280")
419
+ return (
420
+ f"<span class='conf-badge' style='background:{c}'>"
421
+ f"{conf.upper()}</span>"
422
+ )
423
+
424
+
425
+ # ─────────────────────────────────────────────────────────────────────────────
426
+ # Manual input form
427
+ # ─────────────────────────────────────────────────────────────────────────────
428
+
429
  def input_form():
430
+ property_categories = {
431
+ "Polymer": ["Thermal", "Mechanical", "Processing", "Physical", "Descriptive"],
432
+ "Fiber": ["Mechanical", "Physical", "Thermal", "Descriptive"],
433
+ "Composite": [
434
+ "Mechanical", "Thermal", "Processing", "Physical", "Descriptive",
435
+ "Composition / Reinforcement", "Architecture / Structure",
436
+ ],
437
+ }
438
+
439
+ property_names = {
440
+ "Polymer": {
441
+ "Thermal": ["Glass transition temperature (Tg)", "Melting temperature (Tm)",
442
+ "Crystallization temperature (Tc)", "Degree of crystallinity",
443
+ "Decomposition temperature"],
444
+ "Mechanical": ["Tensile modulus", "Tensile strength", "Elongation at break",
445
+ "Flexural modulus", "Impact strength"],
446
+ "Processing": ["Melt flow index (MFI)", "Processing temperature",
447
+ "Cooling rate", "Mold shrinkage"],
448
+ "Physical": ["Density", "Specific gravity"],
449
+ "Descriptive": ["Material grade", "Manufacturer"],
450
+ },
451
+ "Fiber": {
452
+ "Mechanical": ["Tensile modulus", "Tensile strength", "Strain to failure"],
453
+ "Physical": ["Density", "Fiber diameter"],
454
+ "Thermal": ["Decomposition temperature"],
455
+ "Descriptive": ["Fiber type", "Surface treatment"],
456
+ },
457
+ "Composite": {
458
+ "Mechanical": ["Longitudinal modulus (E1)", "Transverse modulus (E2)",
459
+ "Shear modulus (G12)", "Poissons ratio (V12)",
460
+ "Tensile strength (fiber direction)", "Interlaminar shear strength"],
461
+ "Thermal": ["Glass transition temperature (matrix)",
462
+ "Coefficient of thermal expansion (CTE)"],
463
+ "Processing": ["Curing temperature", "Curing pressure"],
464
+ "Physical": ["Density"],
465
+ "Descriptive": ["Laminate type"],
466
+ "Composition / Reinforcement": ["Fiber volume fraction", "Fiber weight fraction",
467
+ "Fiber type", "Matrix type"],
468
+ "Architecture / Structure": ["Weave type", "Ply orientation",
469
+ "Number of plies", "Stacking sequence"],
470
+ },
471
+ }
472
+
473
+ with st.container(border=False, key="material_ident_card"):
474
+ st.markdown(
475
+ "<div class='ud-ident-title'>"
476
+ "<span class='ud-sec-icon'>i</span>Material Identification</div>",
477
+ unsafe_allow_html=True,
478
+ )
479
+
480
+ col_a, col_b = st.columns(2)
481
+ with col_a:
482
+ material_class = st.selectbox(
483
+ "Material Class", ("Polymer", "Fiber", "Composite"),
484
+ index=None, placeholder="Choose material class",
485
+ key="manual_material_class",
486
+ )
487
+ with col_b:
488
+ if material_class:
489
+ property_category = st.selectbox(
490
+ "Property Type", property_categories[material_class],
491
+ index=None, placeholder="Choose property type",
492
+ key="manual_property_category",
493
+ )
494
+ else:
495
+ property_category = None
496
+ st.selectbox(
497
+ "Property Type", ["Choose material class first"],
498
+ index=0, disabled=True,
499
+ key="manual_property_category_disabled",
500
+ )
501
+
502
+ property_name = None
503
+ if material_class and property_category:
504
+ property_options = property_names[material_class][property_category] + ["Something else"]
505
+ property_name = st.selectbox(
506
+ "Property Name", property_options,
507
+ index=None, placeholder="Choose property",
508
+ key="manual_property_name",
509
+ )
510
+
511
+ custom_property_name = ""
512
+ if property_name == "Something else":
513
+ custom_property_name = st.text_input(
514
+ "Custom Property Name", placeholder="Type property name",
515
+ key="manual_custom_property_name",
516
+ ).strip()
517
+
518
+ selected_property_name = (
519
+ custom_property_name if property_name == "Something else" else property_name
520
+ )
521
+
522
+ if material_class and property_category and selected_property_name:
523
+ with st.container(border=False, key="material_form_card"):
524
+ with st.form("user_input"):
525
+ st.subheader("Enter Data")
526
+ material_name = st.text_input("Material Name")
527
+ material_abbr = st.text_input("Material Abbreviation")
528
+ value = st.text_input("Value")
529
+ unit = st.text_input("Unit (SI)")
530
+ english = st.text_input("English Units")
531
+ test_condition = st.text_input("Test Condition")
532
+ comments = st.text_area("Comments")
533
+ submitted = st.form_submit_button("Submit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
  if submitted:
536
  if not (material_name and value):
537
  st.error("Material name and value are required.")
538
  return False
539
+
540
+ input_db = pd.DataFrame([{
541
+ "material_class": material_class,
542
+ "material_name": material_name,
543
+ "material_abbreviation": material_abbr,
544
+ "section": property_category,
545
+ "property_name": selected_property_name,
546
+ "value": value,
547
+ "unit": unit,
548
+ "english": english,
549
+ "test_condition": test_condition,
550
+ "comments": comments,
551
+ }])
552
+
553
+ try:
554
+ inserted = insert_material_rows(input_db)
555
+ except Exception as exc:
556
+ st.error(f"Failed to save to PostgreSQL: {exc}")
557
+ return False
558
+
559
+ if inserted <= 0:
560
+ st.error("No rows were inserted into PostgreSQL.")
561
+ return False
562
+
563
+ st.cache_data.clear()
564
+ st.success("Property added successfully to PostgreSQL.")
565
+ st.dataframe(input_db)
566
+ return True
567
+
568
+ return False
569
+
570
+ return False
571
+
572
+
573
+ # ─────────────────────────────────────────────────────────────────────────────
574
+ # Tab 1: Material Data
575
+ # Uses run_pipeline from doctodb_rag instead of call_gemini_from_bytes
576
+ # ─────────────────────────────────────────────────────────────────────────────
577
+
578
+ # def render_material_data_tab(pdf_path: str):
579
+ # st.subheader("Material Properties Data")
580
+
581
+ # if not st.session_state.pdf_data_extracted:
582
+ # with st.spinner("Extracting material data…"):
583
+ # with open(pdf_path, "rb") as f:
584
+ # pdf_bytes = f.read()
585
+
586
+
587
+ # df, df_gemini, df_gpt, _chunks, api_errors, meta = run_pipeline(pdf_bytes)
588
+
589
+ # if api_errors:
590
+ # for err in api_errors:
591
+ # st.warning(err)
592
+
593
+ # if not df.empty:
594
+ # # Build the metadata dict that the rest of the UI expects
595
+ # data = _df_to_meta(df)
596
+ # st.session_state.pdf_extracted_df = df
597
+ # st.session_state.pdf_data_extracted = True
598
+ # st.session_state.pdf_extracted_meta = data
599
+ # else:
600
+ # st.warning("No data extracted from PDF.")
601
+
602
+ # df = st.session_state.pdf_extracted_df
603
+
604
+ # if df.empty:
605
+ # return
606
+
607
+ # meta = st.session_state.get("pdf_extracted_meta", {})
608
+ # st.success(f"Extracted {len(df)} properties")
609
+
610
+ # col1, col2 = st.columns(2)
611
+ # col1.metric("Material", meta.get("material_name", "N/A"))
612
+ # col2.metric("Abbreviation", meta.get("material_abbreviation", "N/A"))
613
+
614
+ # st.dataframe(df, use_container_width=True, height=400)
615
+ # st.subheader("Assign Material Category")
616
+
617
+ # extracted_material_class = st.selectbox(
618
+ # "Select category for this material",
619
+ # ["Polymer", "Fiber", "Composite"],
620
+ # index=None,
621
+ # placeholder="Required before adding to database",
622
+ # key="tab1_material_class",
623
+ # )
624
+
625
+ # if st.button("+ Add to Database"):
626
+ # if not extracted_material_class:
627
+ # st.error("Please select a material category before adding.")
628
+ # return
629
+
630
+ # df["material_class"] = extracted_material_class
631
+ # df["material_type"] = extracted_material_class
632
+
633
+ # if st.session_state.image_results:
634
+ # with st.spinner("Saving matched plot images…"):
635
+ # saved_images = save_matched_images(
636
+ # df, st.session_state.image_results, save_dir="images"
637
+ # )
638
+ # if saved_images:
639
+ # st.success(f"Saved {len(saved_images)} plot image(s)")
640
+ # with st.expander("View saved images"):
641
+ # for img_info in saved_images:
642
+ # st.write(f"**{img_info['property']}** → {img_info['caption']}")
643
+ # st.write(f"Saved to: `{img_info['path']}`")
644
+ # else:
645
+ # st.info("No plots matched the extracted properties automatically.")
646
+
647
+ # st.session_state.setdefault("user_uploaded_data", pd.DataFrame())
648
+ # st.session_state["user_uploaded_data"] = pd.concat(
649
+ # [st.session_state["user_uploaded_data"], df], ignore_index=True
650
+ # )
651
+ # st.success(f"Added to {extracted_material_class} database!")
652
+ # ── Stage labels and estimated durations for the progress display ─────────────
653
+ _STAGE_LABELS = {
654
+ 0.00: ("Checking cache", 2),
655
+ 0.05: ("Extracting tables & text", 15),
656
+ 0.20: ("Extraction complete", 0),
657
+ 0.25: ("Indexing into ChromaDB", 8),
658
+ 0.40: ("Ranking chunks", 5),
659
+ 0.50: ("Ranking complete", 0),
660
+ 0.55: ("Building batches", 2),
661
+ 0.60: ("Running Gemini + GPT-4o", 30),
662
+ 0.90: ("Merging results", 3),
663
+ 0.95: ("Consensus filtering", 4),
664
+ 1.00: ("Done", 0),
665
+ }
666
+
667
+ def _nearest_stage_label(pct: float) -> tuple[str, int]:
668
+ """Return (label, est_seconds_remaining) for the closest stage."""
669
+ best_key = min(_STAGE_LABELS, key=lambda k: abs(k - pct))
670
+ return _STAGE_LABELS[best_key]
671
+
672
+
673
+ def render_material_data_tab(pdf_path: str):
674
+ st.subheader("Material Properties Data")
675
+
676
+ if not st.session_state.pdf_data_extracted:
677
+
678
+ bar = st.progress(0.0)
679
+ status = st.empty() # stage label + ETA
680
+ timer = st.empty() # elapsed clock
681
+
682
+ start_ts = time.time()
683
+
684
+ def _cb(msg: str, pct: float):
685
+ elapsed = time.time() - start_ts
686
+ label, est_remaining = _nearest_stage_label(pct)
687
+ bar.progress(min(pct, 1.0))
688
+ status.markdown(
689
+ f"**{label}** &nbsp;·&nbsp; <span style='color:#64748b'>{msg}</span>",
690
+ unsafe_allow_html=True,
691
+ )
692
+ if est_remaining > 0:
693
+ timer.caption(
694
+ f"⏱ Elapsed: {elapsed:.0f}s &nbsp;·&nbsp; "
695
+ f"Est. remaining: ~{est_remaining}s"
696
+ )
697
+ else:
698
+ timer.caption(f"⏱ Elapsed: {elapsed:.0f}s")
699
+
700
+ with open(pdf_path, "rb") as f:
701
+ pdf_bytes = f.read()
702
+
703
+ df, _, _, _, api_errors, meta = run_pipeline(
704
+ pdf_bytes, progress_callback=_cb
705
+ )
706
+ elapsed_total = time.time() - start_ts
707
+ bar.progress(1.0)
708
+ status.empty()
709
+ timer.empty()
710
+
711
+ if api_errors:
712
+ for err in api_errors:
713
+ st.warning(err)
714
+
715
+ if not df.empty:
716
+ data = _df_to_meta(df)
717
+ st.session_state.pdf_extracted_df = df
718
+ st.session_state.pdf_data_extracted = True
719
+ st.session_state.pdf_extracted_meta = data
720
+ st.success(
721
+ f"✅ Extracted {len(df)} properties in {elapsed_total:.0f}s"
722
+ + (f" · {meta.get('batches', '?')} batch(es)" if meta.get('batches') else "")
723
+ )
724
+ else:
725
+ st.warning("No data extracted from PDF.")
726
+ return
727
+
728
+ df = st.session_state.pdf_extracted_df
729
+ if df.empty:
730
+ return
731
+
732
+ meta = st.session_state.get("pdf_extracted_meta", {})
733
+
734
+ col1, col2 = st.columns(2)
735
+ col1.metric("Material", meta.get("material_name", "N/A"))
736
+ col2.metric("Abbreviation", meta.get("material_abbreviation", "N/A"))
737
+
738
+ st.dataframe(df, use_container_width=True, height=400)
739
+ st.subheader("Assign Material Category")
740
+
741
+ extracted_material_class = st.selectbox(
742
+ "Select category for this material",
743
+ ["Polymer", "Fiber", "Composite"],
744
+ index=None,
745
+ placeholder="Required before adding to database",
746
+ key="tab1_material_class",
747
+ )
748
+
749
+ if st.button("+ Add to Database"):
750
+ if not extracted_material_class:
751
+ st.error("Please select a material category before adding.")
752
+ return
753
+
754
+ df["material_class"] = extracted_material_class
755
+ df["material_type"] = extracted_material_class
756
+
757
+ if st.session_state.image_results:
758
+ with st.spinner("Saving matched plot images…"):
759
+ saved_images = save_matched_images(
760
+ df, st.session_state.image_results, save_dir="images"
761
+ )
762
+ if saved_images:
763
+ st.success(f"Saved {len(saved_images)} plot image(s)")
764
+ with st.expander("View saved images"):
765
+ for img_info in saved_images:
766
+ st.write(f"**{img_info['property']}** → {img_info['caption']}")
767
+ st.write(f"Saved to: `{img_info['path']}`")
768
+ else:
769
+ st.info("No plots matched the extracted properties automatically.")
770
+
771
+ st.session_state.setdefault("user_uploaded_data", pd.DataFrame())
772
+ st.session_state["user_uploaded_data"] = pd.concat(
773
+ [st.session_state["user_uploaded_data"], df], ignore_index=True
774
+ )
775
+ st.success(f"Added to {extracted_material_class} database!")
776
+
777
+ # ─────────────────────────────────────────────────────────────────────────────
778
+ # Tab 2: Extracted Plots + AI Property Mapping
779
+ # Uses extract_images (adapter above) instead of upload_backend's version
780
+ # ─────────────────────────────────────────────────────────────────────────────
781
+
782
+ def render_plots_tab(pdf_path: str, paper_id: str):
783
+ st.subheader("Extracted Plot Images & Property Mapping")
784
+
785
+
786
+ if not st.session_state.pdf_processed:
787
+ with st.spinner("Extracting plots from PDF…"):
788
+ st.session_state.image_results = extract_images(pdf_path)
789
+ st.session_state.pdf_processed = True
790
+ st.session_state.mapping_done = False
791
+
792
+ image_results = st.session_state.image_results
793
+
794
+ if not image_results:
795
+ st.warning("No plots found in this PDF.")
796
+ return
797
+
798
+ has_data = not st.session_state.pdf_extracted_df.empty
799
+
800
+ if has_data:
801
+ mat_abbr = st.session_state.pdf_extracted_df.iloc[0]["material_abbreviation"]
802
+ property_list = st.session_state.pdf_extracted_df["property_name"].unique().tolist()
803
+ st.info(
804
+ f"**{len(image_results)} plots** extracted | "
805
+ f"Material: **{mat_abbr}** | "
806
+ f"{len(property_list)} properties available for mapping"
807
+ )
808
+ else:
809
+ st.warning(
810
+ "Extract material data in the **Material Data** tab first "
811
+ "to enable AI property mapping."
812
+ )
813
+
814
+ subtab_images, subtab_json = st.tabs(["🖼 Images & Mapping", "{ } JSON Preview"])
815
+
816
+ # ════════════════════════════════════════════════════════════════════════
817
+ with subtab_images:
818
+
819
+ col_img, col_json_dl, col_all = st.columns(3)
820
+ with col_img:
821
+ st.download_button(
822
+ "⬇ Images Only",
823
+ data=create_zip(image_results, include_json=False),
824
+ file_name=f"{paper_id}_images.zip",
825
+ mime="application/zip",
826
+ use_container_width=True,
827
+ key="dl_images",
828
+ )
829
+ with col_json_dl:
830
+ json_meta = [
831
+ {"caption": r["caption"], "page": r["page"],
832
+ "image_count": len(r["image_data"])}
833
+ for r in image_results
834
+ ]
835
+ st.download_button(
836
+ "⬇ JSON",
837
+ data=json.dumps(json_meta, indent=4),
838
+ file_name=f"{paper_id}_metadata.json",
839
+ mime="application/json",
840
+ use_container_width=True,
841
+ key="dl_json",
842
+ )
843
+ with col_all:
844
+ st.download_button(
845
+ "⬇ Download All",
846
+ data=create_zip(image_results, include_json=True),
847
+ file_name=f"{paper_id}_complete.zip",
848
+ mime="application/zip",
849
+ use_container_width=True,
850
+ key="dl_all",
851
+ )
852
+
853
+ st.divider()
854
+
855
+ if has_data:
856
+ col_cls, col_btn = st.columns([0.45, 0.55])
857
+
858
+ with col_cls:
859
+ map_class = st.selectbox(
860
+ "Material class for DB lookup",
861
+ ["Polymer", "Fiber", "Composite"],
862
+ key="mapping_material_class",
863
+ help="Routes to the correct PostgreSQL table.",
864
+ )
865
+
866
+ with col_btn:
867
+ st.write("")
868
+ st.write("")
869
+ run_mapping = st.button(
870
+ "🤖 Run AI Property Mapping",
871
+ type="primary",
872
+ disabled=st.session_state.get("mapping_done", False),
873
+ use_container_width=True,
874
+ )
875
+
876
+ if run_mapping:
877
+ df = st.session_state.pdf_extracted_df
878
+ mat_abbr = df.iloc[0]["material_abbreviation"]
879
+ extracted_json = st.session_state.get("pdf_extracted_meta", {})
880
+
881
+ with st.spinner("Fetching properties from PostgreSQL…"):
882
+ try:
883
+ db_properties = fetch_properties_for_material(
884
+ mat_abbr, map_class, fetch_all
885
+ )
886
+ except Exception as exc:
887
+ st.error(f"DB error: {exc}")
888
+ db_properties = []
889
+
890
+ if not db_properties:
891
+ st.warning(
892
+ f"No DB rows found for **{mat_abbr}** in the **{map_class}** table. "
893
+ "Mapping will use all available properties from the extracted data."
894
+ )
895
+
896
+ prog = st.progress(0, text="Starting…")
897
+
898
+ def _on_progress(i, total, caption):
899
+ pct = int((i / max(total, 1)) * 100)
900
+ prog.progress(pct, text=f"Mapping {i+1}/{total}: {caption[:55]}…")
901
+
902
+ with st.spinner("AI is analysing plots…"):
903
+ mapped = batch_map_plots(
904
+ image_results=image_results,
905
+ extracted_json=extracted_json,
906
+ db_properties=db_properties,
907
+ progress_callback=_on_progress,
908
+ )
909
+
910
+ prog.progress(100, text="Done ✓")
911
+ st.session_state.mapped_results = mapped
912
+ st.session_state.mapping_done = True
913
+ st.success(f"✅ Mapped {len(mapped)} plots — review below.")
914
+ st.rerun()
915
+
916
+ if st.session_state.get("mapping_done"):
917
+ col_info, col_reset = st.columns([0.78, 0.22])
918
+ col_info.caption(
919
+ "AI mapping complete. The dropdown for each plot is pre-filled "
920
+ "with the suggestion — override freely, then hit **Save**."
921
+ )
922
+ if col_reset.button("↺ Re-run Mapping", use_container_width=True):
923
+ st.session_state.mapping_done = False
924
+ st.session_state.mapped_results = []
925
+ st.rerun()
926
+
927
+ st.divider()
928
+
929
+ use_mapped = (
930
+ has_data
931
+ and st.session_state.get("mapping_done", False)
932
+ and bool(st.session_state.get("mapped_results"))
933
+ )
934
+ display_list = (
935
+ st.session_state.mapped_results if use_mapped else image_results
936
+ )
937
+
938
+ for idx in range(len(display_list)):
939
+ if idx >= len(display_list):
940
+ break
941
+
942
+ item = display_list[idx]
943
+ caption = item.get("caption", f"Figure {idx+1}")
944
+ page = item.get("page", "?")
945
+ img_list = item.get("image_data", [])
946
+ mapping = item.get("mapping_result") if use_mapped else None
947
+
948
+ with st.container(border=True):
949
+
950
+ col_cap, col_del = st.columns([0.87, 0.13])
951
+ col_cap.markdown(f"**Page {page}** — {caption}")
952
+ if col_del.button("🗑", key=f"del_grp_{idx}", help="Delete this figure"):
953
+ display_list.pop(idx)
954
+ if use_mapped:
955
+ st.session_state.mapped_results = display_list
956
+ else:
957
+ st.session_state.image_results = display_list
958
+ st.rerun()
959
+
960
+ if mapping:
961
+ prop_name = mapping.get("property_name", "")
962
+ section = mapping.get("section", "")
963
+ confidence = mapping.get("confidence", "low")
964
+ reasoning = mapping.get("reasoning", "")
965
+ db_row = mapping.get("db_row")
966
+ candidates = mapping.get("all_candidates", [])
967
+
968
+ if prop_name:
969
+ badge = _confidence_badge(confidence)
970
+ st.markdown(
971
+ f"🔗 **AI Match:** `{section}` › **{prop_name}** &nbsp; {badge}",
972
+ unsafe_allow_html=True,
973
+ )
974
+ if reasoning:
975
+ st.caption(f"💬 {reasoning}")
976
+
977
+ if db_row:
978
+ with st.expander("📋 Matched DB row", expanded=False):
979
+ c1, c2, c3 = st.columns(3)
980
+ c1.metric("Value", db_row.get("value", "—"))
981
+ c2.metric("Unit", db_row.get("unit", "—"))
982
+ c3.metric("Condition", db_row.get("test_condition", "—"))
983
+ if db_row.get("comments"):
984
+ st.caption(f"Comments: {db_row['comments']}")
985
+ if db_row.get("english"):
986
+ st.caption(f"English units: {db_row['english']}")
987
+
988
+ if candidates:
989
+ with st.expander("🔄 All candidates", expanded=False):
990
+ for c in candidates:
991
+ st.markdown(
992
+ f"{c.get('rank','?')}. `{c.get('section','?')}` › "
993
+ f"**{c.get('property_name','?')}** &nbsp; "
994
+ f"{_confidence_badge(c.get('confidence','low'))}",
995
+ unsafe_allow_html=True,
996
+ )
997
  else:
998
+ st.warning("⚠️ AI could not match this plot to any DB property.")
999
+
1000
+ for p_idx in range(len(img_list)):
1001
+ if p_idx >= len(item.get("image_data", [])):
1002
+ break
1003
+
1004
+ img_data = item["image_data"][p_idx]
1005
+ bgr = img_data.get("array")
1006
+ if bgr is None:
1007
+ continue
1008
+
1009
+ img_key = f"{idx}_{p_idx}_{page}"
1010
+ st.image(bgr, channels="BGR", width=420)
1011
+
1012
+ if has_data:
1013
+ df = st.session_state.pdf_extracted_df
1014
+ mat_abbr = df.iloc[0]["material_abbreviation"]
1015
+ property_list = df["property_name"].unique().tolist()
1016
+ options = ["— Select property —"] + property_list
1017
+
1018
+ ai_prop = mapping.get("property_name", "") if mapping else ""
1019
+ ai_section = mapping.get("section", "") if mapping else ""
1020
+ default_idx = (
1021
+ property_list.index(ai_prop) + 1
1022
+ if ai_prop in property_list else 0
1023
+ )
1024
+
1025
+ col_sel, col_sec, col_save, col_rem = st.columns(
1026
+ [0.40, 0.20, 0.20, 0.20]
1027
+ )
1028
+
1029
+ with col_sel:
1030
+ selected = st.selectbox(
1031
+ "Property",
1032
+ options=options,
1033
+ index=default_idx,
1034
+ key=f"prop_sel_{img_key}",
1035
+ label_visibility="collapsed",
1036
+ )
1037
+
1038
+ with col_sec:
1039
+ section_options = [
1040
+ "Mechanical",
1041
+ "Thermal",
1042
+ "Processing",
1043
+ "Physical",
1044
+ "Descriptive",
1045
+ "Composition / Reinforcement",
1046
+ "Architecture / Structure",
1047
  ]
1048
+ section_default = (
1049
+ section_options.index(ai_section)
1050
+ if ai_section in section_options
1051
+ else 0
1052
+ )
1053
+ section_val = st.selectbox(
1054
+ "Section",
1055
+ options=section_options,
1056
+ index=section_default,
1057
+ key=f"sec_{img_key}",
1058
+ label_visibility="collapsed",
1059
+ )
1060
+
1061
+ with col_save:
1062
+ if st.button("💾 Save", key=f"save_{img_key}",
1063
+ use_container_width=True):
1064
+ if selected and selected != "— Select property —":
1065
+
1066
+ filepath = save_plot_image_mapping(
1067
+ mat_abbr, selected, section_val,
1068
+ bgr, save_dir="images",
1069
+ )
1070
+
1071
+ try:
1072
+ from db import execute_query
1073
+ saved_to_db = save_plot_image_to_db(
1074
+ material_abbr=mat_abbr,
1075
+ property_name=selected,
1076
+ image_bgr=bgr,
1077
+ material_class=st.session_state.get(
1078
+ "mapping_material_class", "Polymer"
1079
+ ),
1080
+ execute_query_fn=execute_query,
1081
+ )
1082
+ if saved_to_db:
1083
+ st.success(
1084
+ f"✅ Saved to DB & disk → "
1085
+ f"`{os.path.basename(filepath)}`"
1086
+ )
1087
+ else:
1088
+ st.warning(
1089
+ "⚠️ Saved to disk only — "
1090
+ "no matching DB row found for this property."
1091
+ )
1092
+ except Exception as e:
1093
+ st.error(f"DB save failed: {e}")
1094
+ st.info(f"Saved locally → `{os.path.basename(filepath)}`")
1095
+
1096
+ st.session_state.saved_image_mapping[img_key] = {
1097
+ "property": selected,
1098
+ "section": section_val,
1099
+ "caption": caption,
1100
+ "filename": os.path.basename(filepath),
1101
+ "path": filepath,
1102
+ }
1103
+ st.rerun()
1104
+ else:
1105
+ st.warning("Select a property first.")
1106
+
1107
+ with col_rem:
1108
+ if st.button("✕", key=f"rem_{img_key}",
1109
+ use_container_width=True, help="Remove image"):
1110
+ if img_key in st.session_state.saved_image_mapping:
1111
+ del st.session_state.saved_image_mapping[img_key]
1112
+ item["image_data"].pop(p_idx)
1113
+ if not item["image_data"]:
1114
+ display_list.pop(idx)
1115
+ if use_mapped:
1116
+ st.session_state.mapped_results = display_list
1117
+ else:
1118
+ st.session_state.image_results = display_list
1119
+ st.rerun()
1120
+
1121
+ if img_key in st.session_state.saved_image_mapping:
1122
+ saved_m = st.session_state.saved_image_mapping[img_key]
1123
+ st.info(
1124
+ f"✅ Saved as **{saved_m['property']}** → "
1125
+ f"`{saved_m['filename']}`"
1126
+ )
1127
+
1128
+ else:
1129
+ col_msg, col_rem = st.columns([0.80, 0.20])
1130
+ col_msg.caption(
1131
+ "Go to **Material Data** tab to extract properties and enable mapping."
1132
  )
1133
+ if col_rem.button("✕", key=f"rem_nd_{img_key}", help="Remove"):
1134
+ item["image_data"].pop(p_idx)
1135
+ if not item["image_data"]:
1136
+ st.session_state.image_results.pop(idx)
1137
+ st.rerun()
1138
 
1139
+ st.divider()
 
 
 
 
1140
 
1141
+ saved_map = st.session_state.saved_image_mapping
1142
+ if saved_map:
1143
+ with st.expander(f"📁 Saved mappings ({len(saved_map)})", expanded=False):
1144
+ for key, info in saved_map.items():
1145
+ st.markdown(
1146
+ f"**{info['property']}** &nbsp;›&nbsp; `{info['filename']}` \n"
1147
+ f"<small style='color:#64748b'>Caption: {info['caption']}</small>",
1148
+ unsafe_allow_html=True,
1149
+ )
1150
 
1151
+ # ════════════════════════════════════════════════════════════════════════
1152
+ with subtab_json:
1153
+ st.subheader("Metadata Preview")
1154
+ json_data = [
1155
+ {
1156
+ "caption": r["caption"],
1157
+ "page": r["page"],
1158
+ "image_count": len(r["image_data"]),
1159
+ "images": [img["filename"] for img in r["image_data"]],
1160
+ }
1161
+ for r in image_results
1162
+ ]
1163
+ st.download_button(
1164
+ "⬇ Download JSON",
1165
+ data=json.dumps(json_data, indent=4),
1166
+ file_name="metadata.json",
1167
+ mime="application/json",
1168
+ key="dl_json_bottom",
1169
+ )
1170
+ st.json(json_data)
1171
 
 
1172
 
1173
+ # ─────────────────────────────────────────────────────────────────────────────
1174
+ # Main
1175
+ # ─────────────────────────────────────────────────────────────────────────────
1176
+
1177
+ def main():
1178
+ inject_upload_page_styles()
1179
+ render_top_bar()
1180
+
1181
+ st.subheader("Submit Scientific Material")
1182
+ st.caption("Provide technical data and research documentation for the central repository.")
1183
+
1184
+ defaults = {
1185
+ "image_results": [],
1186
+ "mapped_results": [],
1187
+ "pdf_processed": False,
1188
+ "mapping_done": False,
1189
+ "current_pdf_name": None,
1190
+ "form_submitted": False,
1191
+ "pdf_data_extracted": False,
1192
+ "pdf_extracted_df": pd.DataFrame(),
1193
+ "pdf_extracted_meta": {},
1194
+ "saved_image_mapping": {},
1195
+ }
1196
+ for k, v in defaults.items():
1197
+ if k not in st.session_state:
1198
+ st.session_state[k] = v
1199
+
 
1200
  with st.container(border=True, key="ud_main_card"):
1201
  if input_form():
1202
  st.session_state.form_submitted = True
1203
 
1204
+ st.markdown(
1205
+ "<div class='ud-upload-title'>"
1206
+ "<span class='ud-sec-icon'>i</span>Research Documentation</div>",
1207
+ unsafe_allow_html=True,
1208
+ )
1209
+
1210
+ uploaded_file = st.file_uploader(
1211
+ "Upload PDF (Material Datasheet or Research Paper)", type=["pdf"]
1212
+ )
1213
+
1214
+ if not uploaded_file:
1215
+ st.info("Upload a PDF to extract material data and plots")
1216
+
1217
+ if not uploaded_file:
1218
+ for k, v in defaults.items():
1219
+ st.session_state[k] = v
1220
+ return
1221
+
1222
+ paper_id = os.path.splitext(uploaded_file.name)[0].replace(" ", "_")
1223
+
1224
+ if st.session_state.current_pdf_name != uploaded_file.name:
1225
+ for k, v in defaults.items():
1226
+ st.session_state[k] = v
1227
+ st.session_state.current_pdf_name = uploaded_file.name
1228
+
1229
+ if st.session_state.form_submitted:
1230
+ st.session_state.form_submitted = False
1231
+ st.info(
1232
+ "Form submitted. Previously extracted data has been saved. "
1233
+ "Upload again to process a new PDF."
1234
+ )
1235
+ st.tabs(["Material Data", "Extracted Plots"])
1236
+ return
1237
+
1238
+ tab1, tab2 = st.tabs(["📊 Material Data", "🖼 Extracted Plots"])
1239
+
1240
+ # Write to a stable temp file (avoids Windows WinError 267 on cleanup)
1241
+ tmp_file = tempfile.NamedTemporaryFile(
1242
+ suffix=".pdf", delete=False, prefix="matdb_"
1243
+ )
1244
+ try:
1245
+ tmp_file.write(uploaded_file.getbuffer())
1246
+ tmp_file.flush()
1247
+ tmp_file.close()
1248
+ pdf_path = tmp_file.name
1249
+
1250
+ with tab1:
1251
+ render_material_data_tab(pdf_path)
1252
+
1253
+ with tab2:
1254
+ render_plots_tab(pdf_path, paper_id)
1255
+
1256
+ finally:
1257
+ try:
1258
+ os.unlink(tmp_file.name)
1259
+ except Exception:
1260
+ pass
1261
+
1262
+
1263
+ main()
1264