Ankushbl6 commited on
Commit
3b295dc
Β·
verified Β·
1 Parent(s): 96e4289

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +71 -8
src/streamlit_app.py CHANGED
@@ -1,6 +1,6 @@
1
  # =========================
2
  # Invoice Extractor (Qwen3-VL via RunPod vLLM) - Batch Mode with Tax Validation
3
- # UPDATED: Fixed tax percentage parsing - no thousands separator logic
4
  # =========================
5
  import os
6
  from pathlib import Path
@@ -88,6 +88,68 @@ def ensure_state(k: str, default):
88
  if k not in st.session_state:
89
  st.session_state[k] = default
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  def clean_tax_percentage(x) -> float:
92
  """
93
  Parse tax percentage - ALWAYS treats periods as decimals, never as thousands.
@@ -797,7 +859,7 @@ def parse_vllm_json(raw_json_text):
797
  result["Itemized Data"].append({
798
  "Description": item.get("descriptions", ""),
799
  "SKU": item.get("SKU", ""),
800
- "Quantity": clean_amount(item.get("quantity", "0")),
801
  "Unit Price": clean_amount(item.get("unit_price", "0")),
802
  "Amount": clean_amount(item.get("amount", "0")),
803
  "Tax": clean_amount(raw_tax),
@@ -1059,7 +1121,7 @@ def map_prediction_to_ui(pred):
1059
 
1060
  return {
1061
  "Description": str(desc).strip(),
1062
- "Quantity": float(clean_number(qty)),
1063
  "Unit Price": float(clean_number(unit_price)),
1064
  "Amount": float(clean_number(amount)),
1065
  "Tax": float(clean_number(tax)),
@@ -1190,7 +1252,7 @@ def map_prediction_to_ui(pred):
1190
  if desc or amt or qty or unit_price:
1191
  items_rows.append({
1192
  "Description": str(desc or ""),
1193
- "Quantity": float(clean_number(qty)),
1194
  "Unit Price": float(clean_number(unit_price)),
1195
  "Amount": float(clean_number(amt)),
1196
  "Tax": float(clean_number(pick_first("tax", "tax_amount") or 0.0)),
@@ -1617,10 +1679,11 @@ elif len(st.session_state.batch_results) > 0:
1617
  if st.session_state.get(f"Currency_{selected_hash}") == 'Other':
1618
  st.text_input("Specify Currency", key=f"Currency_Custom_{selected_hash}")
1619
 
1620
- st.number_input("Subtotal", key=f"Subtotal_{selected_hash}", format="%.2f")
1621
- st.number_input("Tax %", key=f"Tax Percentage_{selected_hash}", format="%.4f")
1622
- st.number_input("Total Tax", key=f"Total Tax_{selected_hash}", format="%.2f")
1623
- st.number_input("Total Amount", key=f"Total Amount_{selected_hash}", format="%.2f")
 
1624
 
1625
  with tabs[1]:
1626
  st.text_input("Sender Name", key=f"Sender Name_{selected_hash}")
 
1
  # =========================
2
  # Invoice Extractor (Qwen3-VL via RunPod vLLM) - Batch Mode with Tax Validation
3
+ # UPDATED: Fixed tax percentage parsing + time format quantities + scrolling issue
4
  # =========================
5
  import os
6
  from pathlib import Path
 
88
  if k not in st.session_state:
89
  st.session_state[k] = default
90
 
91
+ def parse_time_to_minutes(x) -> float:
92
+ """
93
+ Parse time format quantities to minutes.
94
+
95
+ Examples:
96
+ "0:35" β†’ 35.0 (0 hours, 35 minutes = 35 minutes)
97
+ "1:30" β†’ 90.0 (1 hour, 30 minutes = 90 minutes)
98
+ "2:15" β†’ 135.0 (2 hours, 15 minutes = 135 minutes)
99
+ "0:05" β†’ 5.0 (5 minutes)
100
+ "123" β†’ 123.0 (regular number, not time format)
101
+ "1.5" β†’ 1.5 (regular decimal, not time format)
102
+ """
103
+ if x is None:
104
+ return 0.0
105
+ if isinstance(x, (int, float)):
106
+ return float(x)
107
+
108
+ s = str(x).strip()
109
+ if s == "":
110
+ return 0.0
111
+
112
+ # Check if it's in time format (H:MM or HH:MM)
113
+ time_pattern = r'^(\d+):(\d{1,2})$'
114
+ match = re.match(time_pattern, s)
115
+
116
+ if match:
117
+ hours = int(match.group(1))
118
+ minutes = int(match.group(2))
119
+ total_minutes = (hours * 60) + minutes
120
+ return float(total_minutes)
121
+
122
+ # Not time format, treat as regular number
123
+ return 0.0
124
+
125
+ def clean_quantity(x) -> float:
126
+ """
127
+ Parse quantity - handles both time format (H:MM) and regular numbers.
128
+
129
+ Examples:
130
+ "0:35" β†’ 35.0 (time format: 35 minutes)
131
+ "1:30" β†’ 90.0 (time format: 90 minutes)
132
+ "123" β†’ 123.0 (regular number)
133
+ "1,234.56" β†’ 1234.56 (US format with decimals)
134
+ "1.234,56" β†’ 1234.56 (EUR format with decimals)
135
+ """
136
+ if x is None:
137
+ return 0.0
138
+ if isinstance(x, (int, float)):
139
+ return float(x)
140
+
141
+ s = str(x).strip()
142
+ if s == "":
143
+ return 0.0
144
+
145
+ # First check if it's time format (H:MM or HH:MM)
146
+ time_value = parse_time_to_minutes(s)
147
+ if time_value > 0.0:
148
+ return time_value
149
+
150
+ # Not time format, use regular number parsing
151
+ return clean_float(s)
152
+
153
  def clean_tax_percentage(x) -> float:
154
  """
155
  Parse tax percentage - ALWAYS treats periods as decimals, never as thousands.
 
859
  result["Itemized Data"].append({
860
  "Description": item.get("descriptions", ""),
861
  "SKU": item.get("SKU", ""),
862
+ "Quantity": clean_quantity(item.get("quantity", "0")), # βœ… USE clean_quantity for time format support
863
  "Unit Price": clean_amount(item.get("unit_price", "0")),
864
  "Amount": clean_amount(item.get("amount", "0")),
865
  "Tax": clean_amount(raw_tax),
 
1121
 
1122
  return {
1123
  "Description": str(desc).strip(),
1124
+ "Quantity": float(clean_quantity(qty)), # βœ… USE clean_quantity for time format support
1125
  "Unit Price": float(clean_number(unit_price)),
1126
  "Amount": float(clean_number(amount)),
1127
  "Tax": float(clean_number(tax)),
 
1252
  if desc or amt or qty or unit_price:
1253
  items_rows.append({
1254
  "Description": str(desc or ""),
1255
+ "Quantity": float(clean_quantity(qty)), # βœ… USE clean_quantity for time format support
1256
  "Unit Price": float(clean_number(unit_price)),
1257
  "Amount": float(clean_number(amt)),
1258
  "Tax": float(clean_number(pick_first("tax", "tax_amount") or 0.0)),
 
1679
  if st.session_state.get(f"Currency_{selected_hash}") == 'Other':
1680
  st.text_input("Specify Currency", key=f"Currency_Custom_{selected_hash}")
1681
 
1682
+ # βœ… FIX: Add step=None to prevent scroll interference
1683
+ st.number_input("Subtotal", key=f"Subtotal_{selected_hash}", format="%.2f", step=None)
1684
+ st.number_input("Tax %", key=f"Tax Percentage_{selected_hash}", format="%.4f", step=None)
1685
+ st.number_input("Total Tax", key=f"Total Tax_{selected_hash}", format="%.2f", step=None)
1686
+ st.number_input("Total Amount", key=f"Total Amount_{selected_hash}", format="%.2f", step=None)
1687
 
1688
  with tabs[1]:
1689
  st.text_input("Sender Name", key=f"Sender Name_{selected_hash}")