Update app.py
Browse files
app.py
CHANGED
|
@@ -1016,9 +1016,28 @@ class HuggingFaceChatBot:
|
|
| 1016 |
# FIXED APPLICATION WITH UNIQUE WIDGET KEYS
|
| 1017 |
# ===============================================================================
|
| 1018 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1019 |
def create_huggingface_app():
|
| 1020 |
"""Main Streamlit application optimized for Hugging Face Spaces"""
|
| 1021 |
|
|
|
|
|
|
|
|
|
|
| 1022 |
# Custom CSS for better UI
|
| 1023 |
st.markdown("""
|
| 1024 |
<style>
|
|
@@ -1040,14 +1059,6 @@ def create_huggingface_app():
|
|
| 1040 |
.status-ok { color: #28a745; font-weight: bold; }
|
| 1041 |
.status-warning { color: #ffc107; font-weight: bold; }
|
| 1042 |
.status-error { color: #dc3545; font-weight: bold; }
|
| 1043 |
-
.chat-container {
|
| 1044 |
-
border: 1px solid #ddd;
|
| 1045 |
-
border-radius: 10px;
|
| 1046 |
-
padding: 1rem;
|
| 1047 |
-
margin: 1rem 0;
|
| 1048 |
-
max-height: 400px;
|
| 1049 |
-
overflow-y: auto;
|
| 1050 |
-
}
|
| 1051 |
</style>
|
| 1052 |
""", unsafe_allow_html=True)
|
| 1053 |
|
|
@@ -1064,31 +1075,38 @@ def create_huggingface_app():
|
|
| 1064 |
# Initialize processor
|
| 1065 |
if 'hf_processor' not in st.session_state:
|
| 1066 |
with st.spinner("π§ Initializing AI Invoice Processor..."):
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1070 |
|
| 1071 |
# Sidebar with system status
|
| 1072 |
with st.sidebar:
|
| 1073 |
st.header("ποΈ System Status")
|
| 1074 |
|
| 1075 |
-
# Check component status
|
| 1076 |
processor = st.session_state.hf_processor
|
| 1077 |
|
| 1078 |
-
# Document processing
|
| 1079 |
-
if processor.document_processor.processors:
|
| 1080 |
st.markdown('<span class="status-ok">β
Document Processing</span>', unsafe_allow_html=True)
|
| 1081 |
else:
|
| 1082 |
st.markdown('<span class="status-error">β Document Processing</span>', unsafe_allow_html=True)
|
| 1083 |
|
| 1084 |
-
# AI extraction
|
| 1085 |
-
if processor.ai_extractor.use_transformers:
|
| 1086 |
st.markdown('<span class="status-ok">β
AI Extraction</span>', unsafe_allow_html=True)
|
| 1087 |
else:
|
| 1088 |
st.markdown('<span class="status-warning">β οΈ Regex Extraction</span>', unsafe_allow_html=True)
|
| 1089 |
|
| 1090 |
-
# Vector search
|
| 1091 |
-
if processor.vector_store and processor.vector_store.embedding_model:
|
| 1092 |
st.markdown('<span class="status-ok">β
Semantic Search</span>', unsafe_allow_html=True)
|
| 1093 |
else:
|
| 1094 |
st.markdown('<span class="status-warning">β οΈ Keyword Search Only</span>', unsafe_allow_html=True)
|
|
@@ -1102,33 +1120,38 @@ def create_huggingface_app():
|
|
| 1102 |
|
| 1103 |
st.metric("Total Invoices", total_invoices)
|
| 1104 |
st.metric("Total Value", f"βΉ{total_amount:,.2f}")
|
| 1105 |
-
|
|
|
|
|
|
|
|
|
|
| 1106 |
|
| 1107 |
except Exception as e:
|
| 1108 |
st.error(f"Stats error: {e}")
|
| 1109 |
|
| 1110 |
# Processing info
|
| 1111 |
-
st.header("βοΈ
|
| 1112 |
st.info(f"""
|
| 1113 |
-
**
|
| 1114 |
-
|
| 1115 |
-
|
| 1116 |
-
β’
|
|
|
|
|
|
|
| 1117 |
""")
|
| 1118 |
|
| 1119 |
-
# Main
|
| 1120 |
-
|
| 1121 |
-
"
|
| 1122 |
-
"π¬ AI Chat",
|
| 1123 |
-
|
| 1124 |
-
"
|
| 1125 |
-
|
| 1126 |
|
| 1127 |
# -------------------------------------------------------------------------
|
| 1128 |
-
#
|
| 1129 |
# -------------------------------------------------------------------------
|
| 1130 |
|
| 1131 |
-
|
| 1132 |
st.header("π€ Upload Invoice Documents")
|
| 1133 |
|
| 1134 |
# Feature highlights
|
|
@@ -1158,83 +1181,64 @@ def create_huggingface_app():
|
|
| 1158 |
</div>
|
| 1159 |
""", unsafe_allow_html=True)
|
| 1160 |
|
| 1161 |
-
# File upload interface
|
| 1162 |
st.markdown("### π Upload Your Invoices")
|
| 1163 |
|
|
|
|
|
|
|
|
|
|
| 1164 |
uploaded_files = st.file_uploader(
|
| 1165 |
"Choose invoice files (PDF, TXT supported)",
|
| 1166 |
type=['pdf', 'txt'],
|
| 1167 |
accept_multiple_files=True,
|
| 1168 |
-
help=
|
| 1169 |
-
key="
|
| 1170 |
)
|
| 1171 |
|
| 1172 |
if uploaded_files:
|
| 1173 |
-
|
| 1174 |
-
if len(uploaded_files) >
|
| 1175 |
-
st.warning(f"β οΈ Too many files selected. Processing first {
|
| 1176 |
-
uploaded_files = uploaded_files[:
|
| 1177 |
|
| 1178 |
st.info(f"π {len(uploaded_files)} files selected")
|
| 1179 |
|
| 1180 |
-
if st.button("π Process Files", type="primary",
|
| 1181 |
-
|
| 1182 |
-
status_container = st.container()
|
| 1183 |
-
results_container = st.container()
|
| 1184 |
-
|
| 1185 |
-
successful = 0
|
| 1186 |
-
failed = 0
|
| 1187 |
-
|
| 1188 |
-
for i, uploaded_file in enumerate(uploaded_files):
|
| 1189 |
-
progress_bar.progress((i + 1) / len(uploaded_files))
|
| 1190 |
-
|
| 1191 |
-
with status_container:
|
| 1192 |
-
st.info(f"Processing: {uploaded_file.name}")
|
| 1193 |
-
|
| 1194 |
-
# Process file
|
| 1195 |
-
result = st.session_state.hf_processor.process_uploaded_file(uploaded_file)
|
| 1196 |
-
|
| 1197 |
-
with results_container:
|
| 1198 |
-
if result.invoice_number:
|
| 1199 |
-
successful += 1
|
| 1200 |
-
with st.expander(f"β
{uploaded_file.name}", expanded=False, key=f"result_expander_{i}"):
|
| 1201 |
-
col1, col2 = st.columns(2)
|
| 1202 |
-
with col1:
|
| 1203 |
-
st.write(f"**Invoice #:** {result.invoice_number}")
|
| 1204 |
-
st.write(f"**Supplier:** {result.supplier_name}")
|
| 1205 |
-
st.write(f"**Amount:** βΉ{result.amount:.2f}")
|
| 1206 |
-
with col2:
|
| 1207 |
-
st.write(f"**Date:** {result.date}")
|
| 1208 |
-
st.write(f"**Method:** {result.processing_method}")
|
| 1209 |
-
st.write(f"**Confidence:** {result.extraction_confidence:.1%}")
|
| 1210 |
-
else:
|
| 1211 |
-
failed += 1
|
| 1212 |
-
st.warning(f"β οΈ Could not extract data from {uploaded_file.name}")
|
| 1213 |
-
|
| 1214 |
-
# Final status
|
| 1215 |
-
with status_container:
|
| 1216 |
-
st.success(f"β
Processing complete! {successful} successful, {failed} failed")
|
| 1217 |
-
|
| 1218 |
-
if successful > 0:
|
| 1219 |
-
st.balloons()
|
| 1220 |
|
| 1221 |
# -------------------------------------------------------------------------
|
| 1222 |
-
#
|
| 1223 |
# -------------------------------------------------------------------------
|
| 1224 |
|
| 1225 |
-
|
| 1226 |
st.header("π¬ AI Chat Interface")
|
| 1227 |
|
| 1228 |
-
# Display chat history
|
| 1229 |
if st.session_state.chat_history:
|
| 1230 |
st.markdown("### π¬ Chat History")
|
| 1231 |
-
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1236 |
|
| 1237 |
-
# Suggested queries
|
| 1238 |
if not st.session_state.chat_history:
|
| 1239 |
st.markdown("### π‘ Try These Queries")
|
| 1240 |
|
|
@@ -1242,85 +1246,39 @@ def create_huggingface_app():
|
|
| 1242 |
|
| 1243 |
with col1:
|
| 1244 |
st.markdown("**π Basic Queries:**")
|
| 1245 |
-
|
| 1246 |
"Show me a summary of all invoices",
|
| 1247 |
"How much have we spent in total?",
|
| 1248 |
"Who are our top suppliers?",
|
| 1249 |
"Find invoices with high amounts"
|
| 1250 |
]
|
| 1251 |
-
for i, query in enumerate(
|
| 1252 |
-
if st.button(query, key=f"
|
| 1253 |
-
|
| 1254 |
-
response = st.session_state.hf_chatbot.query_database(query)
|
| 1255 |
-
st.session_state.chat_history.append({"role": "assistant", "content": response, "timestamp": datetime.now()})
|
| 1256 |
-
st.rerun()
|
| 1257 |
|
| 1258 |
with col2:
|
| 1259 |
-
st.markdown("**π
|
| 1260 |
-
|
| 1261 |
-
|
| 1262 |
-
|
| 1263 |
-
|
| 1264 |
-
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
|
| 1269 |
-
st.session_state.chat_history.append({"role": "user", "content": query, "timestamp": datetime.now()})
|
| 1270 |
-
response = st.session_state.hf_chatbot.query_database(query)
|
| 1271 |
-
st.session_state.chat_history.append({"role": "assistant", "content": response, "timestamp": datetime.now()})
|
| 1272 |
-
st.rerun()
|
| 1273 |
-
else:
|
| 1274 |
-
st.info("Semantic search not available. Upload some invoices first!")
|
| 1275 |
-
|
| 1276 |
-
# Alternative input method using text input and button
|
| 1277 |
-
st.markdown("### βοΈ Ask a Question")
|
| 1278 |
-
|
| 1279 |
-
col1, col2 = st.columns([4, 1])
|
| 1280 |
-
|
| 1281 |
-
with col1:
|
| 1282 |
-
user_input = st.text_input(
|
| 1283 |
-
"Type your question here:",
|
| 1284 |
-
placeholder="e.g., 'show me total spending' or 'find technology purchases'",
|
| 1285 |
-
key="chat_text_input_tab" # UNIQUE KEY ADDED
|
| 1286 |
-
)
|
| 1287 |
-
|
| 1288 |
-
with col2:
|
| 1289 |
-
ask_button = st.button("π Ask", type="primary", use_container_width=True, key="ask_button_tab")
|
| 1290 |
-
|
| 1291 |
-
# Process the input
|
| 1292 |
-
if ask_button and user_input:
|
| 1293 |
-
# Add user message
|
| 1294 |
-
st.session_state.chat_history.append({
|
| 1295 |
-
"role": "user",
|
| 1296 |
-
"content": user_input,
|
| 1297 |
-
"timestamp": datetime.now()
|
| 1298 |
-
})
|
| 1299 |
-
|
| 1300 |
-
# Get AI response
|
| 1301 |
-
with st.spinner("π€ AI is analyzing..."):
|
| 1302 |
-
response = st.session_state.hf_chatbot.query_database(user_input)
|
| 1303 |
-
|
| 1304 |
-
st.session_state.chat_history.append({
|
| 1305 |
-
"role": "assistant",
|
| 1306 |
-
"content": response,
|
| 1307 |
-
"timestamp": datetime.now()
|
| 1308 |
-
})
|
| 1309 |
-
|
| 1310 |
-
# Clear the input and rerun to show new messages
|
| 1311 |
-
st.rerun()
|
| 1312 |
|
| 1313 |
-
# Clear chat
|
| 1314 |
if st.session_state.chat_history:
|
| 1315 |
-
if st.button("ποΈ Clear Chat
|
| 1316 |
st.session_state.chat_history = []
|
| 1317 |
st.rerun()
|
| 1318 |
|
| 1319 |
# -------------------------------------------------------------------------
|
| 1320 |
-
#
|
| 1321 |
# -------------------------------------------------------------------------
|
| 1322 |
|
| 1323 |
-
|
| 1324 |
st.header("π Analytics Dashboard")
|
| 1325 |
|
| 1326 |
try:
|
|
@@ -1365,38 +1323,30 @@ def create_huggingface_app():
|
|
| 1365 |
title="Invoice Amount Distribution",
|
| 1366 |
labels={'amount': 'Amount (βΉ)', 'count': 'Number of Invoices'}
|
| 1367 |
)
|
| 1368 |
-
st.plotly_chart(fig_hist, use_container_width=True
|
| 1369 |
|
| 1370 |
# Top suppliers
|
| 1371 |
if df['supplier_name'].notna().any():
|
| 1372 |
supplier_amounts = df.groupby('supplier_name')['amount'].sum().sort_values(ascending=False).head(10)
|
| 1373 |
|
| 1374 |
-
|
| 1375 |
-
|
| 1376 |
-
|
| 1377 |
-
|
| 1378 |
-
|
| 1379 |
-
|
| 1380 |
-
|
| 1381 |
-
|
| 1382 |
-
|
| 1383 |
-
# Confidence analysis
|
| 1384 |
-
fig_confidence = px.histogram(
|
| 1385 |
-
df,
|
| 1386 |
-
x='confidence',
|
| 1387 |
-
title="Extraction Confidence Distribution",
|
| 1388 |
-
labels={'confidence': 'Confidence Score', 'count': 'Number of Invoices'}
|
| 1389 |
-
)
|
| 1390 |
-
st.plotly_chart(fig_confidence, use_container_width=True, key="confidence_chart")
|
| 1391 |
|
| 1392 |
except Exception as e:
|
| 1393 |
st.error(f"Analytics error: {e}")
|
| 1394 |
|
| 1395 |
# -------------------------------------------------------------------------
|
| 1396 |
-
#
|
| 1397 |
# -------------------------------------------------------------------------
|
| 1398 |
|
| 1399 |
-
|
| 1400 |
st.header("π Data Explorer")
|
| 1401 |
|
| 1402 |
try:
|
|
@@ -1407,7 +1357,7 @@ def create_huggingface_app():
|
|
| 1407 |
st.info("π No data available. Upload some invoices first.")
|
| 1408 |
return
|
| 1409 |
|
| 1410 |
-
# Convert to DataFrame
|
| 1411 |
df_data = []
|
| 1412 |
for inv in invoices:
|
| 1413 |
df_data.append({
|
|
@@ -1424,19 +1374,19 @@ def create_huggingface_app():
|
|
| 1424 |
|
| 1425 |
df = pd.DataFrame(df_data)
|
| 1426 |
|
| 1427 |
-
# Filters
|
| 1428 |
col1, col2, col3 = st.columns(3)
|
| 1429 |
|
| 1430 |
with col1:
|
| 1431 |
suppliers = ['All'] + sorted(df['Supplier'].dropna().unique().tolist())
|
| 1432 |
-
selected_supplier = st.selectbox("Filter by Supplier", suppliers, key="
|
| 1433 |
|
| 1434 |
with col2:
|
| 1435 |
methods = ['All'] + sorted(df['Method'].dropna().unique().tolist())
|
| 1436 |
-
selected_method = st.selectbox("Filter by Method", methods, key="
|
| 1437 |
|
| 1438 |
with col3:
|
| 1439 |
-
min_amount = st.number_input("Min Amount", min_value=0.0, value=0.0, key="
|
| 1440 |
|
| 1441 |
# Apply filters
|
| 1442 |
filtered_df = df.copy()
|
|
@@ -1447,33 +1397,32 @@ def create_huggingface_app():
|
|
| 1447 |
if min_amount > 0:
|
| 1448 |
filtered_df = filtered_df[filtered_df['Amount'] >= min_amount]
|
| 1449 |
|
| 1450 |
-
# Display
|
| 1451 |
st.dataframe(
|
| 1452 |
filtered_df,
|
| 1453 |
use_container_width=True,
|
| 1454 |
column_config={
|
| 1455 |
"Amount": st.column_config.NumberColumn("Amount", format="βΉ%.2f"),
|
| 1456 |
"Confidence": st.column_config.ProgressColumn("Confidence", min_value=0, max_value=1)
|
| 1457 |
-
}
|
| 1458 |
-
key="data_explorer_table" # UNIQUE KEY ADDED
|
| 1459 |
)
|
| 1460 |
|
| 1461 |
-
# Export options
|
| 1462 |
col1, col2 = st.columns(2)
|
| 1463 |
|
| 1464 |
with col1:
|
| 1465 |
-
if st.button("π₯ Export CSV",
|
| 1466 |
csv_data = filtered_df.to_csv(index=False)
|
| 1467 |
st.download_button(
|
| 1468 |
"Download CSV",
|
| 1469 |
csv_data,
|
| 1470 |
f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
|
| 1471 |
"text/csv",
|
| 1472 |
-
key="
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
with col2:
|
| 1476 |
-
if st.button("π Export JSON",
|
| 1477 |
filtered_invoices = [inv for inv in invoices
|
| 1478 |
if inv.get('invoice_number') in filtered_df['Invoice Number'].values]
|
| 1479 |
|
|
@@ -1488,36 +1437,99 @@ def create_huggingface_app():
|
|
| 1488 |
json.dumps(export_data, indent=2),
|
| 1489 |
f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
|
| 1490 |
"application/json",
|
| 1491 |
-
key="
|
| 1492 |
)
|
| 1493 |
|
| 1494 |
except Exception as e:
|
| 1495 |
st.error(f"Data explorer error: {e}")
|
| 1496 |
|
| 1497 |
# -------------------------------------------------------------------------
|
| 1498 |
-
# GLOBAL CHAT INPUT
|
| 1499 |
# -------------------------------------------------------------------------
|
| 1500 |
|
| 1501 |
-
# Add some spacing
|
| 1502 |
st.markdown("---")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1503 |
|
| 1504 |
-
|
| 1505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1506 |
|
| 1507 |
-
|
| 1508 |
-
|
| 1509 |
|
| 1510 |
-
|
| 1511 |
-
|
| 1512 |
-
|
| 1513 |
-
|
| 1514 |
-
"
|
| 1515 |
-
|
| 1516 |
-
|
| 1517 |
-
|
| 1518 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1519 |
with st.spinner("π€ AI is analyzing..."):
|
| 1520 |
-
response = st.session_state.hf_chatbot.query_database(
|
| 1521 |
|
| 1522 |
st.session_state.chat_history.append({
|
| 1523 |
"role": "assistant",
|
|
@@ -1525,22 +1537,56 @@ def create_huggingface_app():
|
|
| 1525 |
"timestamp": datetime.now()
|
| 1526 |
})
|
| 1527 |
|
| 1528 |
-
# Show
|
| 1529 |
-
|
| 1530 |
-
st.
|
|
|
|
|
|
|
| 1531 |
|
| 1532 |
-
|
| 1533 |
-
|
| 1534 |
-
|
| 1535 |
-
|
| 1536 |
-
|
| 1537 |
-
|
| 1538 |
-
|
| 1539 |
-
|
| 1540 |
-
|
| 1541 |
-
|
| 1542 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1543 |
|
|
|
|
|
|
|
| 1544 |
# ===============================================================================
|
| 1545 |
# MAIN APPLICATION ENTRY POINT
|
| 1546 |
# ===============================================================================
|
|
|
|
| 1016 |
# FIXED APPLICATION WITH UNIQUE WIDGET KEYS
|
| 1017 |
# ===============================================================================
|
| 1018 |
|
| 1019 |
+
# ===============================================================================
|
| 1020 |
+
# FIXED APPLICATION WITH DYNAMIC UNIQUE KEYS AND SESSION STATE
|
| 1021 |
+
# ===============================================================================
|
| 1022 |
+
|
| 1023 |
+
import streamlit as st
|
| 1024 |
+
import pandas as pd
|
| 1025 |
+
import plotly.express as px
|
| 1026 |
+
import json
|
| 1027 |
+
from datetime import datetime
|
| 1028 |
+
import os
|
| 1029 |
+
import uuid
|
| 1030 |
+
|
| 1031 |
+
# Generate unique session ID for this run
|
| 1032 |
+
if 'session_id' not in st.session_state:
|
| 1033 |
+
st.session_state.session_id = str(uuid.uuid4())[:8]
|
| 1034 |
+
|
| 1035 |
def create_huggingface_app():
|
| 1036 |
"""Main Streamlit application optimized for Hugging Face Spaces"""
|
| 1037 |
|
| 1038 |
+
# Get unique session ID
|
| 1039 |
+
session_id = st.session_state.session_id
|
| 1040 |
+
|
| 1041 |
# Custom CSS for better UI
|
| 1042 |
st.markdown("""
|
| 1043 |
<style>
|
|
|
|
| 1059 |
.status-ok { color: #28a745; font-weight: bold; }
|
| 1060 |
.status-warning { color: #ffc107; font-weight: bold; }
|
| 1061 |
.status-error { color: #dc3545; font-weight: bold; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1062 |
</style>
|
| 1063 |
""", unsafe_allow_html=True)
|
| 1064 |
|
|
|
|
| 1075 |
# Initialize processor
|
| 1076 |
if 'hf_processor' not in st.session_state:
|
| 1077 |
with st.spinner("π§ Initializing AI Invoice Processor..."):
|
| 1078 |
+
try:
|
| 1079 |
+
from enhanced_invoice_system_part1 import (
|
| 1080 |
+
HuggingFaceInvoiceProcessor, HF_CONFIG
|
| 1081 |
+
)
|
| 1082 |
+
st.session_state.hf_processor = HuggingFaceInvoiceProcessor()
|
| 1083 |
+
st.session_state.hf_chatbot = HuggingFaceChatBot(st.session_state.hf_processor)
|
| 1084 |
+
st.session_state.chat_history = []
|
| 1085 |
+
st.success("β
System initialized successfully!")
|
| 1086 |
+
except Exception as e:
|
| 1087 |
+
st.error(f"β Initialization failed: {e}")
|
| 1088 |
+
st.stop()
|
| 1089 |
|
| 1090 |
# Sidebar with system status
|
| 1091 |
with st.sidebar:
|
| 1092 |
st.header("ποΈ System Status")
|
| 1093 |
|
|
|
|
| 1094 |
processor = st.session_state.hf_processor
|
| 1095 |
|
| 1096 |
+
# Document processing status
|
| 1097 |
+
if hasattr(processor, 'document_processor') and processor.document_processor.processors:
|
| 1098 |
st.markdown('<span class="status-ok">β
Document Processing</span>', unsafe_allow_html=True)
|
| 1099 |
else:
|
| 1100 |
st.markdown('<span class="status-error">β Document Processing</span>', unsafe_allow_html=True)
|
| 1101 |
|
| 1102 |
+
# AI extraction status
|
| 1103 |
+
if hasattr(processor, 'ai_extractor') and processor.ai_extractor.use_transformers:
|
| 1104 |
st.markdown('<span class="status-ok">β
AI Extraction</span>', unsafe_allow_html=True)
|
| 1105 |
else:
|
| 1106 |
st.markdown('<span class="status-warning">β οΈ Regex Extraction</span>', unsafe_allow_html=True)
|
| 1107 |
|
| 1108 |
+
# Vector search status
|
| 1109 |
+
if hasattr(processor, 'vector_store') and processor.vector_store and processor.vector_store.embedding_model:
|
| 1110 |
st.markdown('<span class="status-ok">β
Semantic Search</span>', unsafe_allow_html=True)
|
| 1111 |
else:
|
| 1112 |
st.markdown('<span class="status-warning">β οΈ Keyword Search Only</span>', unsafe_allow_html=True)
|
|
|
|
| 1120 |
|
| 1121 |
st.metric("Total Invoices", total_invoices)
|
| 1122 |
st.metric("Total Value", f"βΉ{total_amount:,.2f}")
|
| 1123 |
+
|
| 1124 |
+
if hasattr(processor, 'processing_stats'):
|
| 1125 |
+
success_rate = f"{processor.processing_stats['successful']}/{processor.processing_stats['total_processed']}"
|
| 1126 |
+
st.metric("Success Rate", success_rate)
|
| 1127 |
|
| 1128 |
except Exception as e:
|
| 1129 |
st.error(f"Stats error: {e}")
|
| 1130 |
|
| 1131 |
# Processing info
|
| 1132 |
+
st.header("βοΈ System Info")
|
| 1133 |
st.info(f"""
|
| 1134 |
+
**Session ID:** {session_id}
|
| 1135 |
+
|
| 1136 |
+
**Limits:**
|
| 1137 |
+
β’ Max file size: 10MB
|
| 1138 |
+
β’ Max concurrent files: 3
|
| 1139 |
+
β’ Timeout: 30s
|
| 1140 |
""")
|
| 1141 |
|
| 1142 |
+
# Main navigation
|
| 1143 |
+
selected_tab = st.radio(
|
| 1144 |
+
"Choose a section:",
|
| 1145 |
+
["π€ Upload & Process", "π¬ AI Chat", "π Analytics", "π Data Explorer"],
|
| 1146 |
+
horizontal=True,
|
| 1147 |
+
key=f"main_navigation_{session_id}"
|
| 1148 |
+
)
|
| 1149 |
|
| 1150 |
# -------------------------------------------------------------------------
|
| 1151 |
+
# UPLOAD & PROCESS SECTION
|
| 1152 |
# -------------------------------------------------------------------------
|
| 1153 |
|
| 1154 |
+
if selected_tab == "π€ Upload & Process":
|
| 1155 |
st.header("π€ Upload Invoice Documents")
|
| 1156 |
|
| 1157 |
# Feature highlights
|
|
|
|
| 1181 |
</div>
|
| 1182 |
""", unsafe_allow_html=True)
|
| 1183 |
|
| 1184 |
+
# File upload interface
|
| 1185 |
st.markdown("### π Upload Your Invoices")
|
| 1186 |
|
| 1187 |
+
# Use timestamp to ensure unique keys
|
| 1188 |
+
timestamp = datetime.now().strftime("%H%M%S")
|
| 1189 |
+
|
| 1190 |
uploaded_files = st.file_uploader(
|
| 1191 |
"Choose invoice files (PDF, TXT supported)",
|
| 1192 |
type=['pdf', 'txt'],
|
| 1193 |
accept_multiple_files=True,
|
| 1194 |
+
help="Maximum file size: 10MB per file",
|
| 1195 |
+
key=f"file_uploader_{session_id}_{timestamp}"
|
| 1196 |
)
|
| 1197 |
|
| 1198 |
if uploaded_files:
|
| 1199 |
+
max_files = 3
|
| 1200 |
+
if len(uploaded_files) > max_files:
|
| 1201 |
+
st.warning(f"β οΈ Too many files selected. Processing first {max_files} files.")
|
| 1202 |
+
uploaded_files = uploaded_files[:max_files]
|
| 1203 |
|
| 1204 |
st.info(f"π {len(uploaded_files)} files selected")
|
| 1205 |
|
| 1206 |
+
if st.button("π Process Files", type="primary", key=f"process_btn_{session_id}_{timestamp}"):
|
| 1207 |
+
process_files(uploaded_files, session_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1208 |
|
| 1209 |
# -------------------------------------------------------------------------
|
| 1210 |
+
# AI CHAT SECTION
|
| 1211 |
# -------------------------------------------------------------------------
|
| 1212 |
|
| 1213 |
+
elif selected_tab == "π¬ AI Chat":
|
| 1214 |
st.header("π¬ AI Chat Interface")
|
| 1215 |
|
| 1216 |
+
# Display chat history
|
| 1217 |
if st.session_state.chat_history:
|
| 1218 |
st.markdown("### π¬ Chat History")
|
| 1219 |
+
for i, message in enumerate(st.session_state.chat_history):
|
| 1220 |
+
with st.chat_message(message["role"]):
|
| 1221 |
+
st.markdown(message["content"])
|
| 1222 |
+
|
| 1223 |
+
# Chat input area
|
| 1224 |
+
st.markdown("### βοΈ Ask a Question")
|
| 1225 |
+
|
| 1226 |
+
col1, col2 = st.columns([4, 1])
|
| 1227 |
+
|
| 1228 |
+
with col1:
|
| 1229 |
+
user_input = st.text_input(
|
| 1230 |
+
"Type your question:",
|
| 1231 |
+
placeholder="e.g., 'show me total spending'",
|
| 1232 |
+
key=f"chat_input_{session_id}"
|
| 1233 |
+
)
|
| 1234 |
+
|
| 1235 |
+
with col2:
|
| 1236 |
+
ask_btn = st.button("π Ask", type="primary", key=f"ask_btn_{session_id}")
|
| 1237 |
+
|
| 1238 |
+
if ask_btn and user_input:
|
| 1239 |
+
handle_chat_query(user_input)
|
| 1240 |
|
| 1241 |
+
# Suggested queries
|
| 1242 |
if not st.session_state.chat_history:
|
| 1243 |
st.markdown("### π‘ Try These Queries")
|
| 1244 |
|
|
|
|
| 1246 |
|
| 1247 |
with col1:
|
| 1248 |
st.markdown("**π Basic Queries:**")
|
| 1249 |
+
basic_queries = [
|
| 1250 |
"Show me a summary of all invoices",
|
| 1251 |
"How much have we spent in total?",
|
| 1252 |
"Who are our top suppliers?",
|
| 1253 |
"Find invoices with high amounts"
|
| 1254 |
]
|
| 1255 |
+
for i, query in enumerate(basic_queries):
|
| 1256 |
+
if st.button(query, key=f"basic_{session_id}_{i}"):
|
| 1257 |
+
handle_chat_query(query)
|
|
|
|
|
|
|
|
|
|
| 1258 |
|
| 1259 |
with col2:
|
| 1260 |
+
st.markdown("**π Advanced Queries:**")
|
| 1261 |
+
advanced_queries = [
|
| 1262 |
+
"Find technology purchases",
|
| 1263 |
+
"Show office supplies",
|
| 1264 |
+
"Search consulting services",
|
| 1265 |
+
"Recent high-value invoices"
|
| 1266 |
+
]
|
| 1267 |
+
for i, query in enumerate(advanced_queries):
|
| 1268 |
+
if st.button(query, key=f"advanced_{session_id}_{i}"):
|
| 1269 |
+
handle_chat_query(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1270 |
|
| 1271 |
+
# Clear chat
|
| 1272 |
if st.session_state.chat_history:
|
| 1273 |
+
if st.button("ποΈ Clear Chat", key=f"clear_chat_{session_id}"):
|
| 1274 |
st.session_state.chat_history = []
|
| 1275 |
st.rerun()
|
| 1276 |
|
| 1277 |
# -------------------------------------------------------------------------
|
| 1278 |
+
# ANALYTICS SECTION
|
| 1279 |
# -------------------------------------------------------------------------
|
| 1280 |
|
| 1281 |
+
elif selected_tab == "π Analytics":
|
| 1282 |
st.header("π Analytics Dashboard")
|
| 1283 |
|
| 1284 |
try:
|
|
|
|
| 1323 |
title="Invoice Amount Distribution",
|
| 1324 |
labels={'amount': 'Amount (βΉ)', 'count': 'Number of Invoices'}
|
| 1325 |
)
|
| 1326 |
+
st.plotly_chart(fig_hist, use_container_width=True)
|
| 1327 |
|
| 1328 |
# Top suppliers
|
| 1329 |
if df['supplier_name'].notna().any():
|
| 1330 |
supplier_amounts = df.groupby('supplier_name')['amount'].sum().sort_values(ascending=False).head(10)
|
| 1331 |
|
| 1332 |
+
if len(supplier_amounts) > 0:
|
| 1333 |
+
fig_suppliers = px.bar(
|
| 1334 |
+
x=supplier_amounts.values,
|
| 1335 |
+
y=supplier_amounts.index,
|
| 1336 |
+
orientation='h',
|
| 1337 |
+
title="Top 10 Suppliers by Total Amount",
|
| 1338 |
+
labels={'x': 'Total Amount (βΉ)', 'y': 'Supplier'}
|
| 1339 |
+
)
|
| 1340 |
+
st.plotly_chart(fig_suppliers, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1341 |
|
| 1342 |
except Exception as e:
|
| 1343 |
st.error(f"Analytics error: {e}")
|
| 1344 |
|
| 1345 |
# -------------------------------------------------------------------------
|
| 1346 |
+
# DATA EXPLORER SECTION
|
| 1347 |
# -------------------------------------------------------------------------
|
| 1348 |
|
| 1349 |
+
elif selected_tab == "π Data Explorer":
|
| 1350 |
st.header("π Data Explorer")
|
| 1351 |
|
| 1352 |
try:
|
|
|
|
| 1357 |
st.info("π No data available. Upload some invoices first.")
|
| 1358 |
return
|
| 1359 |
|
| 1360 |
+
# Convert to DataFrame
|
| 1361 |
df_data = []
|
| 1362 |
for inv in invoices:
|
| 1363 |
df_data.append({
|
|
|
|
| 1374 |
|
| 1375 |
df = pd.DataFrame(df_data)
|
| 1376 |
|
| 1377 |
+
# Filters
|
| 1378 |
col1, col2, col3 = st.columns(3)
|
| 1379 |
|
| 1380 |
with col1:
|
| 1381 |
suppliers = ['All'] + sorted(df['Supplier'].dropna().unique().tolist())
|
| 1382 |
+
selected_supplier = st.selectbox("Filter by Supplier", suppliers, key=f"supplier_filter_{session_id}")
|
| 1383 |
|
| 1384 |
with col2:
|
| 1385 |
methods = ['All'] + sorted(df['Method'].dropna().unique().tolist())
|
| 1386 |
+
selected_method = st.selectbox("Filter by Method", methods, key=f"method_filter_{session_id}")
|
| 1387 |
|
| 1388 |
with col3:
|
| 1389 |
+
min_amount = st.number_input("Min Amount", min_value=0.0, value=0.0, key=f"amount_filter_{session_id}")
|
| 1390 |
|
| 1391 |
# Apply filters
|
| 1392 |
filtered_df = df.copy()
|
|
|
|
| 1397 |
if min_amount > 0:
|
| 1398 |
filtered_df = filtered_df[filtered_df['Amount'] >= min_amount]
|
| 1399 |
|
| 1400 |
+
# Display data
|
| 1401 |
st.dataframe(
|
| 1402 |
filtered_df,
|
| 1403 |
use_container_width=True,
|
| 1404 |
column_config={
|
| 1405 |
"Amount": st.column_config.NumberColumn("Amount", format="βΉ%.2f"),
|
| 1406 |
"Confidence": st.column_config.ProgressColumn("Confidence", min_value=0, max_value=1)
|
| 1407 |
+
}
|
|
|
|
| 1408 |
)
|
| 1409 |
|
| 1410 |
+
# Export options
|
| 1411 |
col1, col2 = st.columns(2)
|
| 1412 |
|
| 1413 |
with col1:
|
| 1414 |
+
if st.button("π₯ Export CSV", key=f"export_csv_{session_id}"):
|
| 1415 |
csv_data = filtered_df.to_csv(index=False)
|
| 1416 |
st.download_button(
|
| 1417 |
"Download CSV",
|
| 1418 |
csv_data,
|
| 1419 |
f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.csv",
|
| 1420 |
"text/csv",
|
| 1421 |
+
key=f"download_csv_{session_id}"
|
| 1422 |
)
|
| 1423 |
|
| 1424 |
with col2:
|
| 1425 |
+
if st.button("π Export JSON", key=f"export_json_{session_id}"):
|
| 1426 |
filtered_invoices = [inv for inv in invoices
|
| 1427 |
if inv.get('invoice_number') in filtered_df['Invoice Number'].values]
|
| 1428 |
|
|
|
|
| 1437 |
json.dumps(export_data, indent=2),
|
| 1438 |
f"invoices_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
|
| 1439 |
"application/json",
|
| 1440 |
+
key=f"download_json_{session_id}"
|
| 1441 |
)
|
| 1442 |
|
| 1443 |
except Exception as e:
|
| 1444 |
st.error(f"Data explorer error: {e}")
|
| 1445 |
|
| 1446 |
# -------------------------------------------------------------------------
|
| 1447 |
+
# GLOBAL CHAT INPUT (Outside sections)
|
| 1448 |
# -------------------------------------------------------------------------
|
| 1449 |
|
|
|
|
| 1450 |
st.markdown("---")
|
| 1451 |
+
st.markdown("### π¬ Quick Chat (Works from any section)")
|
| 1452 |
+
|
| 1453 |
+
# Global chat input with unique key
|
| 1454 |
+
global_query = st.chat_input("Ask about your invoices...", key=f"global_chat_{session_id}")
|
| 1455 |
|
| 1456 |
+
if global_query:
|
| 1457 |
+
handle_chat_query(global_query, show_response=True)
|
| 1458 |
+
|
| 1459 |
+
# Footer
|
| 1460 |
+
st.markdown("---")
|
| 1461 |
+
st.markdown("""
|
| 1462 |
+
<div style="text-align: center; color: #666;">
|
| 1463 |
+
<p>π <strong>AI Invoice Processing System</strong> - Optimized for Hugging Face Spaces</p>
|
| 1464 |
+
<p>Built with β€οΈ using Streamlit, Transformers, and AI</p>
|
| 1465 |
+
</div>
|
| 1466 |
+
""", unsafe_allow_html=True)
|
| 1467 |
+
|
| 1468 |
+
# ===============================================================================
|
| 1469 |
+
# HELPER FUNCTIONS
|
| 1470 |
+
# ===============================================================================
|
| 1471 |
+
|
| 1472 |
+
def process_files(uploaded_files, session_id):
|
| 1473 |
+
"""Process uploaded files"""
|
| 1474 |
+
progress_bar = st.progress(0)
|
| 1475 |
+
status_container = st.container()
|
| 1476 |
+
results_container = st.container()
|
| 1477 |
|
| 1478 |
+
successful = 0
|
| 1479 |
+
failed = 0
|
| 1480 |
|
| 1481 |
+
for i, uploaded_file in enumerate(uploaded_files):
|
| 1482 |
+
progress_bar.progress((i + 1) / len(uploaded_files))
|
| 1483 |
+
|
| 1484 |
+
with status_container:
|
| 1485 |
+
st.info(f"Processing: {uploaded_file.name}")
|
| 1486 |
+
|
| 1487 |
+
try:
|
| 1488 |
+
# Process file
|
| 1489 |
+
result = st.session_state.hf_processor.process_uploaded_file(uploaded_file)
|
| 1490 |
+
|
| 1491 |
+
with results_container:
|
| 1492 |
+
if result and result.invoice_number:
|
| 1493 |
+
successful += 1
|
| 1494 |
+
with st.expander(f"β
{uploaded_file.name}", expanded=False):
|
| 1495 |
+
col1, col2 = st.columns(2)
|
| 1496 |
+
with col1:
|
| 1497 |
+
st.write(f"**Invoice #:** {result.invoice_number}")
|
| 1498 |
+
st.write(f"**Supplier:** {result.supplier_name}")
|
| 1499 |
+
st.write(f"**Amount:** βΉ{result.amount:.2f}")
|
| 1500 |
+
with col2:
|
| 1501 |
+
st.write(f"**Date:** {result.date}")
|
| 1502 |
+
st.write(f"**Method:** {result.processing_method}")
|
| 1503 |
+
st.write(f"**Confidence:** {result.extraction_confidence:.1%}")
|
| 1504 |
+
else:
|
| 1505 |
+
failed += 1
|
| 1506 |
+
st.warning(f"β οΈ Could not extract data from {uploaded_file.name}")
|
| 1507 |
+
|
| 1508 |
+
except Exception as e:
|
| 1509 |
+
failed += 1
|
| 1510 |
+
with results_container:
|
| 1511 |
+
st.error(f"β Error processing {uploaded_file.name}: {str(e)[:100]}")
|
| 1512 |
+
|
| 1513 |
+
# Final status
|
| 1514 |
+
with status_container:
|
| 1515 |
+
st.success(f"β
Processing complete! {successful} successful, {failed} failed")
|
| 1516 |
+
|
| 1517 |
+
if successful > 0:
|
| 1518 |
+
st.balloons()
|
| 1519 |
+
|
| 1520 |
+
def handle_chat_query(query, show_response=False):
|
| 1521 |
+
"""Handle chat query"""
|
| 1522 |
+
# Add user message
|
| 1523 |
+
st.session_state.chat_history.append({
|
| 1524 |
+
"role": "user",
|
| 1525 |
+
"content": query,
|
| 1526 |
+
"timestamp": datetime.now()
|
| 1527 |
+
})
|
| 1528 |
+
|
| 1529 |
+
# Get AI response
|
| 1530 |
+
try:
|
| 1531 |
with st.spinner("π€ AI is analyzing..."):
|
| 1532 |
+
response = st.session_state.hf_chatbot.query_database(query)
|
| 1533 |
|
| 1534 |
st.session_state.chat_history.append({
|
| 1535 |
"role": "assistant",
|
|
|
|
| 1537 |
"timestamp": datetime.now()
|
| 1538 |
})
|
| 1539 |
|
| 1540 |
+
# Show response if requested
|
| 1541 |
+
if show_response:
|
| 1542 |
+
with st.chat_message("assistant"):
|
| 1543 |
+
st.markdown(response)
|
| 1544 |
+
st.info("π‘ Switch to the 'AI Chat' section to see full conversation history!")
|
| 1545 |
|
| 1546 |
+
st.rerun()
|
| 1547 |
+
|
| 1548 |
+
except Exception as e:
|
| 1549 |
+
st.error(f"Chat error: {e}")
|
| 1550 |
+
|
| 1551 |
+
# ===============================================================================
|
| 1552 |
+
# MAIN ENTRY POINT
|
| 1553 |
+
# ===============================================================================
|
| 1554 |
+
|
| 1555 |
+
def main():
|
| 1556 |
+
"""Main entry point for Hugging Face Spaces"""
|
| 1557 |
+
try:
|
| 1558 |
+
# Import required classes
|
| 1559 |
+
from enhanced_invoice_system_part1 import IS_HF_SPACE
|
| 1560 |
+
|
| 1561 |
+
# Display environment info
|
| 1562 |
+
if IS_HF_SPACE:
|
| 1563 |
+
st.sidebar.info("π€ Running on Hugging Face Spaces")
|
| 1564 |
+
|
| 1565 |
+
# Create and run the app
|
| 1566 |
+
create_huggingface_app()
|
| 1567 |
+
|
| 1568 |
+
except ImportError as e:
|
| 1569 |
+
st.error(f"""
|
| 1570 |
+
## π¨ Import Error
|
| 1571 |
+
|
| 1572 |
+
Missing required modules: {e}
|
| 1573 |
+
|
| 1574 |
+
Please ensure all files are uploaded to your Hugging Face Space:
|
| 1575 |
+
- enhanced_invoice_system_part1.py
|
| 1576 |
+
- enhanced_invoice_system_part2.py (this file)
|
| 1577 |
+
""")
|
| 1578 |
+
|
| 1579 |
+
except Exception as e:
|
| 1580 |
+
st.error(f"""
|
| 1581 |
+
## π¨ Application Error
|
| 1582 |
+
|
| 1583 |
+
{e}
|
| 1584 |
+
|
| 1585 |
+
Please refresh the page or check the logs for more details.
|
| 1586 |
+
""")
|
| 1587 |
|
| 1588 |
+
if __name__ == "__main__":
|
| 1589 |
+
main()
|
| 1590 |
# ===============================================================================
|
| 1591 |
# MAIN APPLICATION ENTRY POINT
|
| 1592 |
# ===============================================================================
|