import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go import numpy as np from datetime import datetime, timedelta import os import io # ========================= # PAGE CONFIG # ========================= st.set_page_config( page_title="OCR Batch Intelligence Dashboard", page_icon="📊", layout="wide", initial_sidebar_state="expanded" ) # ========================= # DISCO MAPPING # ========================= DISCO_MAP = { "11": "LESCO", "12": "GEPCO", "13": "FESCO", "14": "IESCO", "15": "MEPCO", "26": "PESCO", "27": "HAZECO", "37": "HESCO", "38": "SEPCO", "48": "QESCO", "59": "TESCO" } # ========================= # POWER BI MODERN STYLING # ========================= st.markdown(""" """, unsafe_allow_html=True) # ========================= # DATA LOADING FUNCTIONS # ========================= @st.cache_data(ttl=300, show_spinner=False) def load_data_from_file(uploaded_file): """Load data from uploaded file and add processing timestamp""" try: file_extension = uploaded_file.name.split('.')[-1].lower() if file_extension == 'parquet': df = pd.read_parquet(uploaded_file) st.sidebar.success(f"✅ Loaded {len(df):,} records from Parquet file") elif file_extension == 'feather': df = pd.read_feather(uploaded_file) st.sidebar.success(f"✅ Loaded {len(df):,} records from Feather file") elif file_extension == 'csv': df = pd.read_csv(uploaded_file) st.sidebar.success(f"✅ Loaded {len(df):,} records from CSV file") elif file_extension in ['xlsx', 'xls']: df = pd.read_excel(uploaded_file) st.sidebar.success(f"✅ Loaded {len(df):,} records from Excel file") else: st.sidebar.error(f"❌ Unsupported file format: {file_extension}") return None # Check required columns required_columns = ['REF_DIGITS', 'DISCO_CODE', 'BILMONTH', 'IMAGE_VERIFY_CODE_PITC'] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: st.sidebar.error(f"❌ Missing required columns: {missing_columns}") return None # ========================= # NEW: ADD PROCESSING TIMESTAMP # ========================= # Generate a unique batch timestamp for this upload current_time = datetime.now() # Check if PROCESSING_TIMESTAMP already exists if 'PROCESSING_TIMESTAMP' not in df.columns: # If timestamp doesn't exist, add it for all records df['PROCESSING_TIMESTAMP'] = current_time df['PROCESSING_DATE'] = current_time.date() df['PROCESSING_HOUR'] = current_time.hour # Store original upload time df['UPLOAD_TIMESTAMP'] = current_time st.session_state['last_upload_time'] = current_time else: # Convert existing timestamp to datetime if needed if not pd.api.types.is_datetime64_any_dtype(df["PROCESSING_TIMESTAMP"]): df["PROCESSING_TIMESTAMP"] = pd.to_datetime(df["PROCESSING_TIMESTAMP"]) # Extract date and hour df['PROCESSING_DATE'] = df['PROCESSING_TIMESTAMP'].dt.date df['PROCESSING_HOUR'] = df['PROCESSING_TIMESTAMP'].dt.hour # Store the latest processing time latest_time = df['PROCESSING_TIMESTAMP'].max() st.session_state['last_upload_time'] = latest_time # Process the data df["DISCO_NAME"] = df["DISCO_CODE"].map(DISCO_MAP).fillna("UNKNOWN") df["BATCH_NO"] = df["REF_DIGITS"].str[:2] df["SUB_DIV"] = df["REF_DIGITS"].str[:5] df["BATCH_ID"] = df["BATCH_NO"] + "-" + df["DISCO_CODE"] df["PROCESSING_STATUS"] = df["IMAGE_VERIFY_CODE_PITC"].apply( lambda x: "Processed" if pd.notna(x) else "Pending" ) df["ACCURACY_CATEGORY"] = df["IMAGE_VERIFY_CODE_PITC"].apply( lambda x: "Success (A,C,D)" if x in ['A', 'C', 'D'] else "Images Not Available (E)" if x == 'E' else "Reading Not Matched (N)" if x == 'N' else "Not Processed" if pd.isna(x) else "Other" ) # Convert BILMONTH to datetime if needed if not pd.api.types.is_datetime64_any_dtype(df["BILMONTH"]): df["BILMONTH"] = pd.to_datetime(df["BILMONTH"]) return df except Exception as e: st.sidebar.error(f"❌ Error loading file: {str(e)}") return None def create_sample_data(): """Create sample data for demonstration with timestamps""" sample_size = 5000 codes = list(DISCO_MAP.keys())[:5] data = { 'REF_DIGITS': [], 'DISCO_CODE': [], 'BILMONTH': [], 'IMAGE_VERIFY_CODE_PITC': [] } for i in range(sample_size): disco = np.random.choice(codes) batch_no = f"{np.random.randint(1, 50):02d}" sub_div = f"{batch_no}{disco}{np.random.randint(1, 5)}" ref_digits = f"{sub_div}{np.random.randint(100000000, 999999999)}" data['REF_DIGITS'].append(ref_digits) data['DISCO_CODE'].append(disco) data['BILMONTH'].append(np.random.choice(pd.date_range('2024-01-01', periods=6, freq='MS'))) data['IMAGE_VERIFY_CODE_PITC'].append(np.random.choice(['A', 'C', 'D', 'E', 'N', None], p=[0.35, 0.2, 0.15, 0.1, 0.05, 0.15])) df = pd.DataFrame(data) # ========================= # ADD TIMESTAMPS TO SAMPLE DATA # ========================= # Simulate records processed at different times current_time = datetime.now() # Create varying timestamps for the last 7 days time_stamps = [] for i in range(len(df)): # 70% processed today, 20% yesterday, 10% older rand = np.random.random() if rand < 0.7: # Today with varying hours hour = np.random.randint(0, 24) delta = timedelta(hours=hour, minutes=np.random.randint(0, 60)) time_stamps.append(current_time - delta) elif rand < 0.9: # Yesterday delta = timedelta(days=1, hours=np.random.randint(0, 24)) time_stamps.append(current_time - delta) else: # 2-7 days ago days = np.random.randint(2, 8) delta = timedelta(days=days, hours=np.random.randint(0, 24)) time_stamps.append(current_time - delta) df['PROCESSING_TIMESTAMP'] = time_stamps df['PROCESSING_DATE'] = pd.to_datetime(df['PROCESSING_TIMESTAMP']).dt.date df['PROCESSING_HOUR'] = pd.to_datetime(df['PROCESSING_TIMESTAMP']).dt.hour df['UPLOAD_TIMESTAMP'] = current_time # Process the data df["DISCO_NAME"] = df["DISCO_CODE"].map(DISCO_MAP) df["BATCH_NO"] = df["REF_DIGITS"].str[:2] df["SUB_DIV"] = df["REF_DIGITS"].str[:5] df["BATCH_ID"] = df["BATCH_NO"] + "-" + df["DISCO_CODE"] df["PROCESSING_STATUS"] = df["IMAGE_VERIFY_CODE_PITC"].apply( lambda x: "Processed" if pd.notna(x) else "Pending" ) df["ACCURACY_CATEGORY"] = df["IMAGE_VERIFY_CODE_PITC"].apply( lambda x: "Success (A,C,D)" if x in ['A', 'C', 'D'] else "Images Not Available (E)" if x == 'E' else "Reading Not Matched (N)" if x == 'N' else "Not Processed" if pd.isna(x) else "Other" ) # Store the latest timestamp st.session_state['last_upload_time'] = df['PROCESSING_TIMESTAMP'].max() return df # ========================= # HELPER FUNCTIONS # ========================= def get_batch_statistics(df): """Get statistics for all batches""" batch_stats = [] for batch_id in sorted(df["BATCH_ID"].unique()): batch_df = df[df["BATCH_ID"] == batch_id] total_records = len(batch_df) processed_records = batch_df["IMAGE_VERIFY_CODE_PITC"].notna().sum() flag_counts = {} for flag in ['A', 'C', 'D', 'E', 'N']: flag_counts[flag] = (batch_df["IMAGE_VERIFY_CODE_PITC"] == flag).sum() successful = flag_counts['A'] + flag_counts['C'] + flag_counts['D'] total_an = flag_counts['A'] + flag_counts['N'] ocr_accuracy = (flag_counts['A'] / total_an * 100) if total_an > 0 else 0 processing_rate = (processed_records / total_records * 100) if total_records > 0 else 0 success_rate = (successful / processed_records * 100) if processed_records > 0 else 0 # Get latest processing time for this batch latest_processing = batch_df['PROCESSING_TIMESTAMP'].max() if 'PROCESSING_TIMESTAMP' in batch_df.columns else None batch_stats.append({ "Batch ID": batch_id, "Batch No": batch_df["BATCH_NO"].iloc[0], "DISCO": batch_df["DISCO_NAME"].iloc[0], "Total Records": total_records, "Processed": processed_records, "Successful (A,C,D)": successful, "Images Not Available (E)": flag_counts['E'], "Reading Not Matched (N)": flag_counts['N'], "Success Rate (A,C,D)": success_rate, "Processing Rate": processing_rate, "OCR Model Accuracy (A vs N)": ocr_accuracy, "Flag A": flag_counts['A'], "Flag C": flag_counts['C'], "Flag D": flag_counts['D'], "Flag E": flag_counts['E'], "Flag N": flag_counts['N'], "Latest Processing Time": latest_processing, }) return pd.DataFrame(batch_stats) def calculate_ocr_model_accuracy(df): """Calculate OCR model accuracy based on A vs N flags only""" count_a = (df["IMAGE_VERIFY_CODE_PITC"] == 'A').sum() count_n = (df["IMAGE_VERIFY_CODE_PITC"] == 'N').sum() total_an = count_a + count_n if total_an > 0: accuracy = (count_a / total_an * 100) else: accuracy = 0 return { "count_a": count_a, "count_n": count_n, "total_an": total_an, "accuracy": accuracy } def get_recently_processed(df, hours=24): """Get records processed in the last N hours""" if 'PROCESSING_TIMESTAMP' not in df.columns: return pd.DataFrame() cutoff_time = datetime.now() - timedelta(hours=hours) recent_df = df[df['PROCESSING_TIMESTAMP'] > cutoff_time].copy() return recent_df def get_processing_trend(df): """Get processing trend by date/hour""" if 'PROCESSING_DATE' not in df.columns: return pd.DataFrame() # Group by processing date daily_trend = df.groupby('PROCESSING_DATE').size().reset_index(name='Records_Processed') daily_trend = daily_trend.sort_values('PROCESSING_DATE') # Add running total daily_trend['Cumulative_Total'] = daily_trend['Records_Processed'].cumsum() return daily_trend # ========================= # SIDEBAR # ========================= with st.sidebar: st.markdown('
', unsafe_allow_html=True) st.markdown("### 📂 DATA SOURCE") data_source = st.radio( "Select data source:", ["Upload File", "Sample Data"], index=0 ) df = None if data_source == "Upload File": uploaded_file = st.file_uploader( "Choose a file", type=['parquet', 'feather', 'csv', 'xlsx', 'xls'], help="Upload Parquet, Feather, CSV, or Excel file" ) if uploaded_file is not None: with st.spinner(f"Loading {uploaded_file.name}..."): df = load_data_from_file(uploaded_file) if df is not None: st.info(f"**File:** {uploaded_file.name}") st.info(f"**Records:** {len(df):,}") st.info(f"**DISCOs:** {df['DISCO_NAME'].nunique()}") # Show timestamp info if 'PROCESSING_TIMESTAMP' in df.columns: latest_time = df['PROCESSING_TIMESTAMP'].max() st.info(f"**Latest Processing:** {latest_time.strftime('%Y-%m-%d %H:%M:%S')}") else: # Sample Data if st.button("Load Sample Data", use_container_width=True): with st.spinner("Creating sample data..."): df = create_sample_data() st.success(f"✅ Loaded {len(df):,} sample records") st.markdown('
', unsafe_allow_html=True) if df is not None: st.markdown('
', unsafe_allow_html=True) st.markdown("### 🎯 ANALYSIS SETTINGS") # DISCO selection selection_mode = st.radio( "Analysis Mode:", ["Single DISCO", "All DISCOS"], index=0 ) if selection_mode == "Single DISCO": disco_choice = st.selectbox( "Select DISCO:", options=sorted(df["DISCO_NAME"].unique().tolist()), index=0 ) disco_code = None for code, name in DISCO_MAP.items(): if name == disco_choice: disco_code = code break else: disco_code = None disco_choice = "ALL DISCOS" # Filters st.markdown("#### ⚙️ FILTERS") date_filter = st.checkbox("Apply Date Filter", value=False) if date_filter: min_date = df["BILMONTH"].min().date() max_date = df["BILMONTH"].max().date() start_date, end_date = st.date_input( "Select Date Range:", value=(min_date, max_date), min_value=min_date, max_value=max_date ) # NEW: Processing Time Filter st.markdown("#### ⏰ PROCESSING TIME") show_recent = st.checkbox("Show only recently processed records", value=False) if show_recent: recent_hours = st.slider( "Last N hours:", min_value=1, max_value=168, value=24, help="Show records processed in the last N hours" ) show_only_processed = st.checkbox("Show only processed records", value=False) # Flag filters selected_flags = st.multiselect( "Filter by Flag:", ["A", "C", "D", "E", "N", "Unprocessed"], default=["A", "C", "D", "E", "N", "Unprocessed"] ) # Batch filter batch_filter = st.text_input( "Filter by Batch ID:", placeholder="e.g., 01-15, 02-15" ) st.markdown('
', unsafe_allow_html=True) # Refresh button if st.button("🔄 Refresh Dashboard", use_container_width=True, type="primary"): st.cache_data.clear() st.rerun() # ========================= # MAIN DASHBOARD # ========================= if df is None: # Show welcome screen st.markdown(f"""

📊 OCR BATCH INTELLIGENCE DASHBOARD

Upload your data file to get started

""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) st.markdown("### 📋 HOW TO USE") col1, col2 = st.columns(2) with col1: st.markdown("#### 📥 **Step 1: Extract Data**") st.markdown(""" 1. Run the extraction script on your local machine 2. Connect to your Oracle database 3. Export data as **Parquet file** (recommended) 4. The script will create `ocr_data_YYYYMMDD_HHMMSS.parquet` **Add timestamp during extraction:** - Add a timestamp column during Oracle extraction - This helps track when each record was processed """) with col2: st.markdown("#### 📤 **Step 2: Upload Here**") st.markdown(""" 1. Go to sidebar → **Upload File** 2. Select your Parquet file 3. Wait for data to load 4. Start analyzing! **Automatic timestamp:** - If no timestamp exists, one will be added - You can filter by recent processing time - Track new records easily """) st.markdown("#### ⏰ **Time Tracking Features:**") st.markdown(""" ✅ **Processing Timestamp** - When each record was processed ✅ **Recent Records Filter** - View records from last N hours ✅ **Processing Trend** - See daily processing volume ✅ **New Records Alert** - Identify newly added records """) st.markdown('
', unsafe_allow_html=True) st.stop() # ========================= # APPLY FILTERS # ========================= # Apply DISCO filter if disco_code: filtered_df = df[df["DISCO_CODE"] == disco_code].copy() else: filtered_df = df.copy() # Apply date filter if 'date_filter' in locals() and date_filter: filtered_df = filtered_df[ (filtered_df["BILMONTH"] >= pd.Timestamp(start_date)) & (filtered_df["BILMONTH"] <= pd.Timestamp(end_date)) ] # NEW: Apply recent processing filter if 'show_recent' in locals() and show_recent: recent_df = get_recently_processed(filtered_df, hours=recent_hours) if not recent_df.empty: filtered_df = recent_df else: st.warning(f"No records processed in the last {recent_hours} hours") # Apply processed filter if 'show_only_processed' in locals() and show_only_processed: filtered_df = filtered_df[filtered_df["IMAGE_VERIFY_CODE_PITC"].notna()] # Apply flag filter if 'selected_flags' in locals() and selected_flags: if "Unprocessed" in selected_flags: flag_filter = filtered_df["IMAGE_VERIFY_CODE_PITC"].isna() else: flag_filter = filtered_df["IMAGE_VERIFY_CODE_PITC"].isin([f for f in selected_flags if f != "Unprocessed"]) if "Unprocessed" in selected_flags and len(selected_flags) > 1: flag_filter = flag_filter | filtered_df["IMAGE_VERIFY_CODE_PITC"].isin([f for f in selected_flags if f != "Unprocessed"]) filtered_df = filtered_df[flag_filter] # Apply batch filter if 'batch_filter' in locals() and batch_filter: filtered_df = filtered_df[filtered_df["BATCH_ID"].str.contains(batch_filter, case=False, na=False)] # Calculate statistics batch_stats = get_batch_statistics(filtered_df) ocr_accuracy = calculate_ocr_model_accuracy(filtered_df) # ========================= # HEADER WITH TIME INFO # ========================= # Get timestamp info if 'PROCESSING_TIMESTAMP' in filtered_df.columns and not filtered_df.empty: latest_time = filtered_df['PROCESSING_TIMESTAMP'].max() earliest_time = filtered_df['PROCESSING_TIMESTAMP'].min() time_range = f"{earliest_time.strftime('%Y-%m-%d')} to {latest_time.strftime('%Y-%m-%d')}" else: latest_time = "N/A" time_range = "N/A" st.markdown(f"""

📊 OCR BATCH INTELLIGENCE DASHBOARD

Advanced Analytics | Time-Based Tracking | OCR Model Performance

📁 File Data 📅 Current: {datetime.now().strftime('%Y-%m-%d %H:%M')} 🏢 {disco_choice} 📊 {len(filtered_df):,} Records ⏰ Latest: {latest_time.strftime('%Y-%m-%d %H:%M') if isinstance(latest_time, datetime) else 'N/A'}
""", unsafe_allow_html=True) # ========================= # NEW: RECENT PROCESSING SECTION # ========================= st.markdown('
', unsafe_allow_html=True) st.markdown("### ⏰ RECENT PROCESSING ACTIVITY") # Calculate recent records current_time = datetime.now() if 'PROCESSING_TIMESTAMP' in df.columns: # Records from last 24 hours last_24h = get_recently_processed(df, hours=24) last_7d = get_recently_processed(df, hours=168) col1, col2, col3, col4 = st.columns(4) with col1: recent_1h = get_recently_processed(df, hours=1) st.metric( label="Last 1 Hour", value=f"{len(recent_1h):,}", delta=f"{len(recent_1h)} new" if len(recent_1h) > 0 else "0 new" ) with col2: recent_24h = get_recently_processed(df, hours=24) st.metric( label="Last 24 Hours", value=f"{len(recent_24h):,}", delta=f"{len(recent_24h)} processed" ) with col3: recent_7d = get_recently_processed(df, hours=168) st.metric( label="Last 7 Days", value=f"{len(recent_7d):,}", delta=f"{len(recent_7d)} processed" ) with col4: # Calculate processing rate per day if len(recent_7d) > 0: avg_per_day = len(recent_7d) / 7 st.metric( label="Avg/Day (7D)", value=f"{avg_per_day:,.0f}", delta="" ) else: st.metric("Avg/Day (7D)", "0", delta="") # Show processing trend chart processing_trend = get_processing_trend(df) if not processing_trend.empty: fig = px.line( processing_trend, x='PROCESSING_DATE', y='Records_Processed', title='Daily Processing Trend', markers=True, color_discrete_sequence=['#0078D4'] ) fig.update_layout( xaxis_title='Date', yaxis_title='Records Processed', hovermode='x unified' ) st.plotly_chart(fig, use_container_width=True) else: st.info("No processing timestamp data available. Timestamps will be added on next upload.") st.markdown('
', unsafe_allow_html=True) # ========================= # EXECUTIVE KPIs # ========================= st.markdown('
', unsafe_allow_html=True) st.markdown("### 📈 EXECUTIVE DASHBOARD") total_records = len(filtered_df) processed_records = filtered_df["IMAGE_VERIFY_CODE_PITC"].notna().sum() successful_records = filtered_df["IMAGE_VERIFY_CODE_PITC"].isin(['A', 'C', 'D']).sum() processing_rate = (processed_records / total_records * 100) if total_records > 0 else 0 success_rate = (successful_records / processed_records * 100) if processed_records > 0 else 0 kpi_cols = st.columns(5) with kpi_cols[0]: st.markdown(f"""
📊
{total_records:,}
Total Records
{processing_rate:.1f}% processed
""", unsafe_allow_html=True) with kpi_cols[1]: st.markdown(f"""
{success_rate:.1f}%
Success Rate (A,C,D)
{successful_records:,} successful
""", unsafe_allow_html=True) with kpi_cols[2]: st.markdown(f"""
🤖
{ocr_accuracy['accuracy']:.1f}%
OCR Model Accuracy
A vs N only
""", unsafe_allow_html=True) with kpi_cols[3]: perfect_matches = (filtered_df["IMAGE_VERIFY_CODE_PITC"] == 'A').sum() perfect_match_rate = (perfect_matches / processed_records * 100) if processed_records > 0 else 0 st.markdown(f"""
{perfect_match_rate:.1f}%
Perfect Match (A)
{perfect_matches:,} perfect
""", unsafe_allow_html=True) with kpi_cols[4]: image_issues = (filtered_df["IMAGE_VERIFY_CODE_PITC"] == 'E').sum() image_issue_rate = (image_issues / processed_records * 100) if processed_records > 0 else 0 st.markdown(f"""
🖼️
{image_issue_rate:.1f}%
Image Issues (E)
{image_issues:,} records
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # ========================= # FLAG DISTRIBUTION # ========================= st.markdown('
', unsafe_allow_html=True) st.markdown("### 🏷️ FLAG CLASSIFICATION DISTRIBUTION") flag_counts = filtered_df["IMAGE_VERIFY_CODE_PITC"].value_counts() flag_cols = st.columns(5) for idx, flag in enumerate(['A', 'C', 'D', 'E', 'N']): count = flag_counts.get(flag, 0) with flag_cols[idx]: percentage = (count / processed_records * 100) if processed_records > 0 else 0 flag_labels = { 'A': '✅ PERFECT MATCH', 'C': '⚠️ FLAG C', 'D': '⚠️ FLAG D', 'E': '❌ IMAGE ISSUE', 'N': '❌ READING ISSUE' } flag_descs = { 'A': 'Successful with perfect reading (80%+)', 'C': 'Successful with minor issues', 'D': 'Successful but needs review', 'E': 'Image not available', 'N': 'Reading not matched (<80%)' } st.markdown(f"""
{flag}
{count:,}
{flag_labels.get(flag, f'FLAG {flag}')}
{percentage:.1f}% of processed
{flag_descs.get(flag, 'Unknown flag')}
""", unsafe_allow_html=True) st.markdown('
', unsafe_allow_html=True) # ========================= # DATA PREVIEW WITH TIMESTAMP # ========================= st.markdown('
', unsafe_allow_html=True) st.markdown("### 📋 DATA OVERVIEW") tab1, tab2, tab3 = st.tabs(["📊 Records", "📈 Batch Statistics", "⏰ Recent Activity"]) with tab1: display_columns = ["REF_DIGITS", "DISCO_NAME", "BATCH_ID", "IMAGE_VERIFY_CODE_PITC", "ACCURACY_CATEGORY"] # Add timestamp if available if 'PROCESSING_TIMESTAMP' in filtered_df.columns: display_columns.append("PROCESSING_TIMESTAMP") st.dataframe( filtered_df[display_columns].head(100), use_container_width=True, column_config={ "PROCESSING_TIMESTAMP": st.column_config.DatetimeColumn( "Processing Time", format="YYYY-MM-DD HH:mm:ss" ) } if 'PROCESSING_TIMESTAMP' in display_columns else None ) with tab2: if not batch_stats.empty: display_stats = batch_stats[["Batch ID", "DISCO", "Total Records", "Processed", "Success Rate (A,C,D)", "OCR Model Accuracy (A vs N)", "Latest Processing Time"]] st.dataframe(display_stats, use_container_width=True) else: st.info("No batch statistics available") with tab3: # Show recently processed records if 'PROCESSING_TIMESTAMP' in df.columns: recent_records = get_recently_processed(df, hours=24) if not recent_records.empty: st.markdown(f"### 🆕 Records Processed in Last 24 Hours: {len(recent_records):,}") # Group by hour recent_records['PROCESSING_HOUR'] = pd.to_datetime(recent_records['PROCESSING_TIMESTAMP']).dt.floor('H') hourly_counts = recent_records.groupby('PROCESSING_HOUR').size().reset_index(name='Count') hourly_counts = hourly_counts.sort_values('PROCESSING_HOUR', ascending=False) # Show hourly breakdown col1, col2 = st.columns([2, 1]) with col1: st.dataframe( recent_records[["REF_DIGITS", "DISCO_NAME", "BATCH_ID", "IMAGE_VERIFY_CODE_PITC", "PROCESSING_TIMESTAMP"]] .sort_values('PROCESSING_TIMESTAMP', ascending=False) .head(50), use_container_width=True ) with col2: st.markdown("#### 📊 By Hour") st.dataframe(hourly_counts, use_container_width=True) # Show latest batch if not recent_records.empty: latest_batch = recent_records.loc[recent_records['PROCESSING_TIMESTAMP'].idxmax()] st.markdown("#### 🔍 Latest Record") st.write(f"**REF:** {latest_batch['REF_DIGITS']}") st.write(f"**DISCO:** {latest_batch['DISCO_NAME']}") st.write(f"**Batch:** {latest_batch['BATCH_ID']}") st.write(f"**Flag:** {latest_batch['IMAGE_VERIFY_CODE_PITC']}") st.write(f"**Time:** {latest_batch['PROCESSING_TIMESTAMP']}") else: st.info("No records processed in the last 24 hours") else: st.info("No timestamp data available") st.markdown('
', unsafe_allow_html=True) # ========================= # FOOTER WITH ENHANCED TIME INFO # ========================= # Calculate time-based metrics if 'PROCESSING_TIMESTAMP' in filtered_df.columns and not filtered_df.empty: time_metrics = { "latest_time": filtered_df['PROCESSING_TIMESTAMP'].max(), "earliest_time": filtered_df['PROCESSING_TIMESTAMP'].min(), "records_today": len(get_recently_processed(df, hours=24)), "unique_dates": filtered_df['PROCESSING_DATE'].nunique() if 'PROCESSING_DATE' in filtered_df.columns else 0 } else: time_metrics = { "latest_time": "N/A", "earliest_time": "N/A", "records_today": 0, "unique_dates": 0 } st.markdown(f"""
📅 Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
🏢 DISCO: {disco_choice}
📊 Records: {len(filtered_df):,}
Success Rate: {success_rate:.1f}%
🤖 OCR Accuracy: {ocr_accuracy['accuracy']:.1f}%
Latest Processing: {time_metrics['latest_time'].strftime('%Y-%m-%d %H:%M') if isinstance(time_metrics['latest_time'], datetime) else 'N/A'}
🚀 OCR Intelligence Dashboard | Time-Based Tracking | Parquet Format Recommended
""", unsafe_allow_html=True)