Seth0330 commited on
Commit
e3ce562
Β·
verified Β·
1 Parent(s): cdae312

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -83
app.py CHANGED
@@ -39,88 +39,7 @@ MODELS = {
39
  }
40
  }
41
 
42
- # Create tabs for different functionalities
43
- tab1, tab2 = st.tabs(["PDF Summarizer", "Invoice Extractor"])
44
-
45
- # PDF Summarizer Tab
46
- with tab1:
47
- st.title("PDF to Bullet Point Summarizer πŸ—Ÿ πŸ”")
48
-
49
- # File uploader for the PDF
50
- uploaded_file = st.file_uploader("Upload your PDF document", type="pdf", key="pdf_uploader")
51
-
52
- # Slider for users to select the summarization extent
53
- summary_scale = st.slider("Select the extent of summarization (%)", min_value=1, max_value=100, value=20, key="summary_scale")
54
-
55
- # Submit button
56
- submit_button = st.button("Generate Summary", key="summary_button")
57
-
58
- # Check if the submit button is pressed
59
- if submit_button and uploaded_file is not None:
60
- with st.spinner('Processing...'):
61
- # Read the PDF content
62
- text = read_pdf(io.BytesIO(uploaded_file.getvalue()))
63
-
64
- # Extract key phrases from the text
65
- key_phrases = extract_key_phrases(text)
66
-
67
- # Score sentences based on the key phrases
68
- sentence_scores = score_sentences(text, key_phrases)
69
-
70
- # Determine the number of bullet points based on the selected summarization scale
71
- total_sentences = len(list(sentence_scores.keys()))
72
- num_points = max(1, total_sentences * summary_scale // 100)
73
-
74
- # Generate the bullet-point summary
75
- summary = summarize_text(sentence_scores, num_points=num_points)
76
-
77
- # Display the summary as bullet points
78
- st.subheader("Here's the summary: ")
79
- st.markdown(summary)
80
-
81
- # Invoice Extractor Tab
82
- with tab2:
83
- st.title("πŸ“‹ Invoice Extractor from PDF")
84
- st.write("Upload an invoice PDF to extract key details")
85
-
86
- # Model selection
87
- model_choice = st.selectbox(
88
- "Select AI Model",
89
- list(MODELS.keys()),
90
- index=0,
91
- help="Choose which AI model to use for extraction",
92
- key="model_choice"
93
- )
94
-
95
- # File uploader for the invoice PDF
96
- invoice_pdf = st.file_uploader("Upload Invoice PDF", type="pdf", key="invoice_pdf_uploader")
97
-
98
- if st.button("Extract Invoice Information", key="invoice_button") and invoice_pdf is not None:
99
- with st.spinner('Reading PDF...'):
100
- # Read the PDF content
101
- invoice_text = read_pdf(io.BytesIO(invoice_pdf.getvalue()))
102
-
103
- # Process in status container
104
- with st.status("Processing...", expanded=True) as status:
105
- st.write(f"πŸ€– Querying {model_choice} API...")
106
- invoice_data = extract_invoice_info(model_choice, invoice_text)
107
-
108
- if invoice_data:
109
- status.update(label="βœ… Extraction Complete!", state="complete")
110
- display_invoice_data(model_choice, invoice_data)
111
- else:
112
- status.update(label="❌ Extraction Failed", state="error")
113
- st.error("Failed to extract information. Try simplifying the text.")
114
-
115
- # Debug information outside the status container
116
- if invoice_data and "last_api_response" in st.session_state:
117
- with st.expander("Debug Information"):
118
- st.write("API Response:")
119
- st.json(st.session_state.last_api_response)
120
- st.write("Raw API Response:")
121
- st.code(st.session_state.get("last_api_response_raw", "No response"))
122
-
123
- # Invoice Extractor Functions
124
  def get_api_key(model_choice):
125
  """Get the appropriate API key based on model choice"""
126
  api_key_env = MODELS[model_choice]["api_key_env"]
@@ -438,4 +357,85 @@ def extract_invoice_info(model_choice, text):
438
  if field not in item:
439
  item[field] = None if field != "quantity" else 0
440
 
441
- return parsed_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
  }
41
 
42
+ # Define all invoice extraction functions first
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def get_api_key(model_choice):
44
  """Get the appropriate API key based on model choice"""
45
  api_key_env = MODELS[model_choice]["api_key_env"]
 
357
  if field not in item:
358
  item[field] = None if field != "quantity" else 0
359
 
360
+ return parsed_data
361
+
362
+ # Create tabs for different functionalities
363
+ tab1, tab2 = st.tabs(["PDF Summarizer", "Invoice Extractor"])
364
+
365
+ # PDF Summarizer Tab
366
+ with tab1:
367
+ st.title("PDF to Bullet Point Summarizer πŸ—Ÿ πŸ”")
368
+
369
+ # File uploader for the PDF
370
+ uploaded_file = st.file_uploader("Upload your PDF document", type="pdf", key="pdf_uploader")
371
+
372
+ # Slider for users to select the summarization extent
373
+ summary_scale = st.slider("Select the extent of summarization (%)", min_value=1, max_value=100, value=20, key="summary_scale")
374
+
375
+ # Submit button
376
+ submit_button = st.button("Generate Summary", key="summary_button")
377
+
378
+ # Check if the submit button is pressed
379
+ if submit_button and uploaded_file is not None:
380
+ with st.spinner('Processing...'):
381
+ # Read the PDF content
382
+ text = read_pdf(io.BytesIO(uploaded_file.getvalue()))
383
+
384
+ # Extract key phrases from the text
385
+ key_phrases = extract_key_phrases(text)
386
+
387
+ # Score sentences based on the key phrases
388
+ sentence_scores = score_sentences(text, key_phrases)
389
+
390
+ # Determine the number of bullet points based on the selected summarization scale
391
+ total_sentences = len(list(sentence_scores.keys()))
392
+ num_points = max(1, total_sentences * summary_scale // 100)
393
+
394
+ # Generate the bullet-point summary
395
+ summary = summarize_text(sentence_scores, num_points=num_points)
396
+
397
+ # Display the summary as bullet points
398
+ st.subheader("Here's the summary: ")
399
+ st.markdown(summary)
400
+
401
+ # Invoice Extractor Tab
402
+ with tab2:
403
+ st.title("πŸ“‹ Invoice Extractor from PDF")
404
+ st.write("Upload an invoice PDF to extract key details")
405
+
406
+ # Model selection
407
+ model_choice = st.selectbox(
408
+ "Select AI Model",
409
+ list(MODELS.keys()),
410
+ index=0,
411
+ help="Choose which AI model to use for extraction",
412
+ key="model_choice"
413
+ )
414
+
415
+ # File uploader for the invoice PDF
416
+ invoice_pdf = st.file_uploader("Upload Invoice PDF", type="pdf", key="invoice_pdf_uploader")
417
+
418
+ if st.button("Extract Invoice Information", key="invoice_button") and invoice_pdf is not None:
419
+ with st.spinner('Reading PDF...'):
420
+ # Read the PDF content
421
+ invoice_text = read_pdf(io.BytesIO(invoice_pdf.getvalue()))
422
+
423
+ # Process in status container
424
+ with st.status("Processing...", expanded=True) as status:
425
+ st.write(f"πŸ€– Querying {model_choice} API...")
426
+ invoice_data = extract_invoice_info(model_choice, invoice_text)
427
+
428
+ if invoice_data:
429
+ status.update(label="βœ… Extraction Complete!", state="complete")
430
+ display_invoice_data(model_choice, invoice_data)
431
+ else:
432
+ status.update(label="❌ Extraction Failed", state="error")
433
+ st.error("Failed to extract information. Try simplifying the text.")
434
+
435
+ # Debug information outside the status container
436
+ if invoice_data and "last_api_response" in st.session_state:
437
+ with st.expander("Debug Information"):
438
+ st.write("API Response:")
439
+ st.json(st.session_state.last_api_response)
440
+ st.write("Raw API Response:")
441
+ st.code(st.session_state.get("last_api_response_raw", "No response"))