import streamlit as st import pandas as pd import plotly.express as px import requests from io import StringIO from datetime import datetime # Page configuration st.set_page_config( page_title="Terror Finance & Maritime Watch", page_icon="🛡️", layout="wide", initial_sidebar_state="collapsed" ) # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) # Function to extract IMO from Information field def extract_imo_from_info(info_text): """Extract IMO number from Information field""" import re if not info_text or pd.isna(info_text): return "N/A" # Look for IMO pattern: IMO- followed by 7 digits imo_match = re.search(r'IMO-?\s*(\d{7})', str(info_text), re.IGNORECASE) if imo_match: return imo_match.group(1) return "N/A" def extract_mmsi_from_info(info_text): """Extract MMSI from Information field""" import re if not info_text or pd.isna(info_text): return "N/A" # Look for MMSI pattern mmsi_match = re.search(r'MMSI-?\s*(\d{9})', str(info_text), re.IGNORECASE) if mmsi_match: return mmsi_match.group(1) return "N/A" def extract_call_sign_from_info(info_text): """Extract Call Sign from Information field""" import re if not info_text or pd.isna(info_text): return "N/A" # Look for Call Sign pattern call_match = re.search(r'Call Sign-?\s*([A-Z0-9]+)', str(info_text), re.IGNORECASE) if call_match: return call_match.group(1) return "N/A" def extract_owner_from_info(info_text): """Extract owner information from Information field""" import re if not info_text or pd.isna(info_text): return "N/A" # Look for Registered owner or Commercial manager owner_match = re.search(r'Registered owner-?\s*([^,]+)', str(info_text), re.IGNORECASE) if owner_match: return owner_match.group(1).strip() manager_match = re.search(r'Commercial manager-?\s*([^,]+)', str(info_text), re.IGNORECASE) if manager_match: return manager_match.group(1).strip() return "N/A" def safe_get(row, key, default="N/A"): try: # Special handling for Name field if key == 'Name': # Try to get Name column first if 'Name' in row.index and pd.notna(row['Name']) and str(row['Name']).strip(): return str(row['Name']).strip() # Fallback: combine Last_Name and First_Name last_name = row.get('Last_Name', '') if 'Last_Name' in row.index else '' first_name = row.get('First_Name', '') if 'First_Name' in row.index else '' # Clean up the names last_name = str(last_name).strip() if pd.notna(last_name) else '' first_name = str(first_name).strip() if pd.notna(first_name) else '' # Combine names if last_name and first_name: return f"{last_name} {first_name}" elif first_name: return first_name elif last_name: return last_name else: dc_id = row.get('DC_ID', 'Unknown') return f"Entity {dc_id}" # Try exact key first if key in row.index: val = row[key] else: # Try alternative column names for common fields alternatives = { 'DC_ID': ['ID', 'Entity_ID'], 'Countries': ['Country', 'Location'], 'Companies': ['Company', 'Business'], 'Phone#': ['Phone', 'Telephone', 'Contact_Phone'], 'Linked To': ['Organization', 'Terror_Organization', 'Group'], 'IMO': ['IMO_Number', 'IMO_No', 'International_Maritime_Organization'], 'Flag': ['Flag_State', 'Flag_Country'], 'DWT': ['Deadweight', 'Dead_Weight_Tonnage'], 'Built_Year': ['Year_Built', 'Construction_Year', 'DOB'], 'Status': ['Vessel_Status', 'Ship_Status', 'AIS_Status'], 'Insurance': ['Insurer', 'Insurance_Company'] } val = None if key in alternatives: for alt_key in alternatives[key]: if alt_key in row.index: val = row[alt_key] break if val is None: return default # Handle None, NaN, empty strings if pd.isna(val) or str(val).strip() == '' or str(val).lower() in ['nan', 'none', 'null']: return default return str(val).strip() except (KeyError, TypeError, AttributeError): return default # Load clean datasets from Hugging Face @st.cache_data def load_clean_data(): """Load pre-processed clean datasets""" base_url = "https://huggingface.co/spaces/Malaji71/list/resolve/main/" datasets = {} files_to_load = { 'individuals': 'individuals_clean.csv', 'companies': 'companies_clean.csv', 'vessels': 'vessels_clean.csv' } for category, filename in files_to_load.items(): try: url = base_url + filename response = requests.get(url) response.raise_for_status() # Load CSV from response content df = pd.read_csv(StringIO(response.text)) # Clean up any remaining data issues for col in df.columns: if df[col].dtype == 'object': df[col] = df[col].fillna('').astype(str) datasets[category] = df except Exception as e: st.error(f"Error loading {filename}: {str(e)}") # Create empty dataframe as fallback datasets[category] = pd.DataFrame() return datasets # Create fallback data if CSV loading fails def create_sample_data(): """Create sample data if CSV files are not available""" individuals = pd.DataFrame({ 'Name': ['Sample Individual 1', 'Sample Individual 2'], 'DC_ID': [1, 2], 'Position': ['Financier', 'Operator'], 'Countries': ['Lebanon', 'Syria'], 'Email': ['sample1@email.com', 'sample2@email.com'], 'Phone#': ['+961-xxx-xxxx', '+963-xxx-xxxx'], 'Companies': ['Sample Corp', 'Test LLC'], 'Linked To': ['Hamas', 'Hezbollah'], 'Auto_Category': ['Individual', 'Individual'] }) companies = pd.DataFrame({ 'Name': ['Sample Company 1', 'Sample Company 2'], 'DC_ID': [101, 102], 'Sub_Category': ['Shell Company', 'Front Company'], 'Countries': ['Panama', 'Cyprus'], 'Owner': ['Unknown', 'Sample Person'], 'Key_Individuals': ['Person A', 'Person B'], 'Linked To': ['Hamas', 'Hezbollah'], 'Auto_Category': ['Company', 'Company'] }) vessels = pd.DataFrame({ 'Name': ['Sample Vessel 1', 'Sample Vessel 2'], 'DC_ID': [201, 202], 'IMO': ['1234567', '7654321'], 'Flag': ['Panama', 'Liberia'], 'Owner': ['Sample Maritime LLC', 'Ocean Holdings'], 'Status': ['AIS Off', 'Active'], 'Insurance': ['Unknown', 'Lloyd\'s'], 'Auto_Category': ['Vessel', 'Vessel'] }) return { 'individuals': individuals, 'companies': companies, 'vessels': vessels } # Header col1, col2, col3 = st.columns([6, 1, 1]) with col1: st.markdown("# 🛡️ Terror Finance & Maritime Watch") st.markdown("*Powered by Pariente AI - Advanced Intelligence Analytics*") with col2: theme_toggle = st.checkbox("🌙", key="theme") with col3: st.markdown('

🤖
Intelligence Platform

', unsafe_allow_html=True) # Load data try: data = load_clean_data() # Check if data was loaded successfully if all(len(df) == 0 for df in data.values()): st.warning("⚠️ Using sample data - CSV files not found. Please upload the processed CSV files.") data = create_sample_data() except Exception as e: st.error(f"Error loading data: {str(e)}") data = create_sample_data() # Navigation tabs tab1, tab2, tab3, tab4, tab5 = st.tabs(["👥 Individuals", "🏢 Companies", "🚢 Vessels", "📊 Summary", "📋 Data Reports"]) # Summary Tab with tab4: st.markdown("## Key Statistics") col1, col2, col3, col4 = st.columns(4) with col1: st.markdown(f"""

{len(data['individuals'])}

Individuals Tracked

""", unsafe_allow_html=True) with col2: st.markdown(f"""

{len(data['companies'])}

Companies Monitored

""", unsafe_allow_html=True) with col3: st.markdown(f"""

{len(data['vessels'])}

Vessels Documented

""", unsafe_allow_html=True) with col4: # Calculate unique countries safely try: all_countries = [] for df in data.values(): if 'Countries' in df.columns: countries = df['Countries'].dropna().astype(str) for country_list in countries: if ',' in country_list: all_countries.extend([c.strip() for c in country_list.split(',') if c.strip()]) else: all_countries.append(country_list.strip()) unique_countries = len(set([c for c in all_countries if c and c.lower() != 'nan'])) except: unique_countries = 0 st.markdown(f"""

{unique_countries}

Countries Involved

""", unsafe_allow_html=True) # Charts st.markdown("## Data Visualization") col1, col2, col3 = st.columns(3) with col1: st.markdown("### Organizations") try: if len(data['individuals']) > 0 and 'Linked To' in data['individuals'].columns: org_data = data['individuals']['Linked To'].value_counts().reset_index() org_data.columns = ['Organization', 'Count'] if not org_data.empty: fig = px.pie(org_data, names='Organization', values='Count') fig.update_layout(showlegend=True, height=300) st.plotly_chart(fig, use_container_width=True) else: st.write("No organization data available") else: st.write("No organization data available") except Exception as e: st.write("Error creating organization chart") with col2: st.markdown("### Entity Types") type_data = pd.DataFrame({ 'Type': ['Individuals', 'Companies', 'Vessels'], 'Count': [len(data['individuals']), len(data['companies']), len(data['vessels'])] }) fig = px.bar(type_data, x='Type', y='Count', color_discrete_sequence=['#3182CE']) fig.update_layout(showlegend=False, height=300) st.plotly_chart(fig, use_container_width=True) with col3: st.markdown("### Top Countries") try: if unique_countries > 0: country_counts = {} for df in data.values(): if 'Countries' in df.columns: countries = df['Countries'].dropna().astype(str) for country_list in countries: if ',' in country_list: for country in country_list.split(','): country = country.strip() if country and country.lower() != 'nan': country_counts[country] = country_counts.get(country, 0) + 1 else: country = country_list.strip() if country and country.lower() != 'nan': country_counts[country] = country_counts.get(country, 0) + 1 if country_counts: country_data = pd.DataFrame(list(country_counts.items()), columns=['Country', 'Count']) country_data = country_data.sort_values('Count', ascending=False).head(10) fig = px.pie(country_data, names='Country', values='Count') fig.update_layout(showlegend=True, height=300) st.plotly_chart(fig, use_container_width=True) else: st.write("No country data available") else: st.write("No country data available") except Exception as e: st.write("Error creating country chart") # Individuals Tab with tab1: st.markdown("## Individuals Database") # Export and filters col1, col2 = st.columns([6, 1]) with col2: if st.button("📥 Export", key="export_individuals"): csv_data = data['individuals'].to_csv(index=False) st.download_button( label="Download CSV", data=csv_data, file_name=f"individuals_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv" ) # Filters col1, col2, col3 = st.columns([3, 2, 2]) with col1: name_filter = st.text_input("Search by name", placeholder="Enter name...", key="search_individuals") with col2: country_options = ["All Countries"] if len(data['individuals']) > 0 and 'Countries' in data['individuals'].columns: all_countries = set() for country_list in data['individuals']['Countries'].dropna().astype(str): if ',' in country_list: all_countries.update([c.strip() for c in country_list.split(',') if c.strip() and c.lower() != 'nan']) else: if country_list.strip() and country_list.lower() != 'nan': all_countries.add(country_list.strip()) country_options += sorted(list(all_countries)) country_filter = st.selectbox("Country", country_options, key="country_individuals") with col3: org_options = ["All Organizations"] if len(data['individuals']) > 0 and 'Linked To' in data['individuals'].columns: orgs = data['individuals']['Linked To'].dropna().astype(str) org_options += sorted(orgs.unique().tolist()) org_filter = st.selectbox("Organization", org_options, key="org_individuals") # Apply filters filtered_individuals = data['individuals'].copy() if name_filter: filtered_individuals = filtered_individuals[ filtered_individuals['Name'].str.contains(name_filter, case=False, na=False) ] if country_filter != "All Countries": filtered_individuals = filtered_individuals[ filtered_individuals['Countries'].str.contains(country_filter, case=False, na=False) ] if org_filter != "All Organizations": filtered_individuals = filtered_individuals[ filtered_individuals['Linked To'] == org_filter ] # Display individuals for _, person in filtered_individuals.iterrows(): org_value = safe_get(person, 'Linked To') org_color = "tag-danger" if "Hamas" in org_value else "tag-warning" st.markdown(f"""

{safe_get(person, 'Name')}

ID: {safe_get(person, 'DC_ID')}

Position: {safe_get(person, 'Position')}

Countries: {safe_get(person, 'Countries')}

Email: {safe_get(person, 'Email')}

Phone: {safe_get(person, 'Phone#')}

Companies: {safe_get(person, 'Companies')}

{org_value}

""", unsafe_allow_html=True) # Companies Tab with tab2: st.markdown("## Companies Database") # Export col1, col2 = st.columns([6, 1]) with col2: if st.button("📥 Export", key="export_companies"): csv_data = data['companies'].to_csv(index=False) st.download_button( label="Download CSV", data=csv_data, file_name=f"companies_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv" ) # Search name_filter = st.text_input("Search companies", placeholder="Enter company name...", key="search_companies") # Apply filters filtered_companies = data['companies'].copy() if name_filter: filtered_companies = filtered_companies[ filtered_companies['Name'].str.contains(name_filter, case=False, na=False) ] # Display companies for _, company in filtered_companies.iterrows(): org_value = safe_get(company, 'Linked To') org_color = "tag-danger" if "Hamas" in org_value else "tag-warning" st.markdown(f"""

{safe_get(company, 'Name')}

ID: {safe_get(company, 'DC_ID')}

Type: {safe_get(company, 'Sub_Category')}

Countries: {safe_get(company, 'Countries')}

Owner: {safe_get(company, 'Owner')}

Key Individuals: {safe_get(company, 'Key_Individuals')}

{org_value}

""", unsafe_allow_html=True) # Vessels Tab with tab3: st.markdown("## Maritime Vessels Database") # Export col1, col2 = st.columns([6, 1]) with col2: if st.button("📥 Export", key="export_vessels"): csv_data = data['vessels'].to_csv(index=False) st.download_button( label="Download CSV", data=csv_data, file_name=f"vessels_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv" ) # Filters for vessels col1, col2, col3, col4 = st.columns([3, 2, 2, 2]) with col1: name_filter = st.text_input("Search vessels", placeholder="Enter vessel name or IMO...", key="search_vessels") with col2: flag_options = ["All Flags"] if len(data['vessels']) > 0 and 'Flag' in data['vessels'].columns: flags = data['vessels']['Flag'].dropna().astype(str).unique() flag_options += sorted([f for f in flags if f and f.lower() != 'nan']) flag_filter = st.selectbox("Flag State", flag_options, key="flag_vessels") with col3: status_options = ["All Status"] if len(data['vessels']) > 0 and 'Status' in data['vessels'].columns: statuses = data['vessels']['Status'].dropna().astype(str).unique() status_options += sorted([s for s in statuses if s and s.lower() != 'nan']) status_filter = st.selectbox("Status", status_options, key="status_vessels") with col4: org_options = ["All Organizations"] if len(data['vessels']) > 0 and 'Linked To' in data['vessels'].columns: orgs = data['vessels']['Linked To'].dropna().astype(str).unique() org_options += sorted([o for o in orgs if o and o.lower() != 'nan']) org_filter = st.selectbox("Organization", org_options, key="org_vessels") # Apply filters filtered_vessels = data['vessels'].copy() if name_filter: # Search in Name, IMO, and other relevant fields mask = ( filtered_vessels['Name'].str.contains(name_filter, case=False, na=False) | filtered_vessels.get('IMO', pd.Series()).astype(str).str.contains(name_filter, case=False, na=False) | filtered_vessels.get('Owner', pd.Series()).astype(str).str.contains(name_filter, case=False, na=False) | filtered_vessels.get('Information', pd.Series()).astype(str).str.contains(name_filter, case=False, na=False) ) filtered_vessels = filtered_vessels[mask] if flag_filter != "All Flags": filtered_vessels = filtered_vessels[ filtered_vessels['Flag'].str.contains(flag_filter, case=False, na=False) ] if status_filter != "All Status": filtered_vessels = filtered_vessels[ filtered_vessels['Status'] == status_filter ] if org_filter != "All Organizations": filtered_vessels = filtered_vessels[ filtered_vessels['Linked To'] == org_filter ] # Display vessels for _, vessel in filtered_vessels.iterrows(): status_value = safe_get(vessel, 'Status') status_class = "status-inactive" if status_value == 'AIS Off' else "status-active" # Get vessel-specific information imo = safe_get(vessel, 'IMO') flag = safe_get(vessel, 'Flag') vessel_type = safe_get(vessel, 'Sub_Category') owner = safe_get(vessel, 'Owner') built_year = safe_get(vessel, 'Built_Year', safe_get(vessel, 'DOB')) # Sometimes vessel age is in DOB dwt = safe_get(vessel, 'DWT') st.markdown(f"""

{safe_get(vessel, 'Name')}

DC_ID: {safe_get(vessel, 'DC_ID')}

IMO Number: {imo}

Vessel Type: {vessel_type}

Flag State: {flag}

Owner: {owner}

Built Year: {built_year}

DWT: {dwt}

Status: {status_value}

Insurance: {safe_get(vessel, 'Insurance')}

Countries: {safe_get(vessel, 'Countries')}

Information: {safe_get(vessel, 'Information')}

""", unsafe_allow_html=True) # Data Reports Tab with tab5: st.markdown("## 📋 Data Analysis Reports") # Load additional analysis files try: # Load analysis JSON analysis_url = "https://huggingface.co/spaces/Malaji71/list/resolve/main/complete_analysis.json" analysis_response = requests.get(analysis_url) if analysis_response.status_code == 200: analysis_data = analysis_response.json() col1, col2 = st.columns(2) with col1: st.markdown("### 📊 Data Quality Metrics") if 'complete_analysis' in analysis_data: quality_metrics = analysis_data['complete_analysis'].get('quality_metrics', {}) if 'completeness' in quality_metrics: completeness_df = pd.DataFrame( list(quality_metrics['completeness'].items()), columns=['Column', 'Completeness %'] ).sort_values('Completeness %', ascending=False) st.dataframe(completeness_df, use_container_width=True) if 'issues' in quality_metrics and quality_metrics['issues']: st.markdown("### ⚠️ Data Issues") for issue in quality_metrics['issues']: st.warning(issue) with col2: st.markdown("### 🎯 Categorization Analysis") if 'complete_analysis' in analysis_data: cat_analysis = analysis_data['complete_analysis'].get('categorization_analysis', {}) if 'content_based_categorization' in cat_analysis: cat_counts = cat_analysis['content_based_categorization'] for category, count in cat_counts.items(): st.metric(category.title(), count) st.markdown("### 📈 Processing Statistics") processing_stats = { 'Total Rows Processed': analysis_data.get('complete_analysis', {}).get('total_rows', 0), 'Total Columns Analyzed': analysis_data.get('complete_analysis', {}).get('total_columns', 0), 'Analysis Timestamp': analysis_data.get('complete_analysis', {}).get('timestamp', 'Unknown') } for stat, value in processing_stats.items(): st.write(f"**{stat}**: {value}") else: st.info("📄 Complete analysis data not available. Upload complete_analysis.json to see detailed reports.") except Exception as e: st.info("📄 Analysis reports will be available when you upload the JSON files.") # Load and display text reports try: # Executive Summary summary_url = "https://huggingface.co/spaces/Malaji71/list/resolve/main/executive_summary.txt" summary_response = requests.get(summary_url) if summary_response.status_code == 200: st.markdown("### 📝 Executive Summary") st.text(summary_response.text) except: pass try: # Analysis Report excerpt report_url = "https://huggingface.co/spaces/Malaji71/list/resolve/main/analysis_report.txt" report_response = requests.get(report_url) if report_response.status_code == 200: st.markdown("### 📋 Full Analysis Report") with st.expander("View Complete Report"): st.text(report_response.text) except: pass # Download section st.markdown("### 📥 Download Analysis Files") col1, col2, col3, col4 = st.columns(4) with col1: try: # Download complete analysis JSON analysis_url = "https://huggingface.co/spaces/Malaji71/list/resolve/main/complete_analysis.json" analysis_response = requests.get(analysis_url) if analysis_response.status_code == 200: st.download_button( label="📊 Analysis JSON", data=analysis_response.content, file_name=f"complete_analysis_{datetime.now().strftime('%Y%m%d')}.json", mime="application/json", help="Complete analysis data in JSON format" ) else: st.button("📊 Analysis JSON", disabled=True, help="File not available") except: st.button("📊 Analysis JSON", disabled=True, help="File not available") with col2: try: # Download analysis report report_url = "https://huggingface.co/spaces/Malaji71/list/resolve/main/analysis_report.txt" report_response = requests.get(report_url) if report_response.status_code == 200: st.download_button( label="📋 Full Report", data=report_response.text.encode('utf-8'), file_name=f"analysis_report_{datetime.now().strftime('%Y%m%d')}.txt", mime="text/plain", help="Detailed human-readable report" ) else: st.button("📋 Full Report", disabled=True, help="File not available") except: st.button("📋 Full Report", disabled=True, help="File not available") with col3: try: # Download executive summary summary_url = "https://huggingface.co/spaces/Malaji71/list/resolve/main/executive_summary.txt" summary_response = requests.get(summary_url) if summary_response.status_code == 200: st.download_button( label="📝 Executive Summary", data=summary_response.text.encode('utf-8'), file_name=f"executive_summary_{datetime.now().strftime('%Y%m%d')}.txt", mime="text/plain", help="Executive summary" ) else: st.button("📝 Executive Summary", disabled=True, help="File not available") except: st.button("📝 Executive Summary", disabled=True, help="File not available") with col4: try: # Download uncertain entities uncertain_url = "https://huggingface.co/spaces/Malaji71/list/resolve/main/uncertain_entities.csv" uncertain_response = requests.get(uncertain_url) if uncertain_response.status_code == 200: st.download_button( label="❓ Uncertain Entities", data=uncertain_response.content, file_name=f"uncertain_entities_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv", help="Entities requiring manual review" ) else: st.button("❓ Uncertain Entities", disabled=True, help="File not available") except: st.button("❓ Uncertain Entities", disabled=True, help="File not available") # Additional download options st.markdown("---") st.markdown("### 📦 Bulk Download") if st.button("📥 Download All Analysis Files", use_container_width=True): try: # Create a ZIP file with all analysis files import zipfile from io import BytesIO zip_buffer = BytesIO() with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: # Try to add each file files_to_add = [ ("complete_analysis.json", "application/json"), ("analysis_report.txt", "text/plain"), ("executive_summary.txt", "text/plain"), ("uncertain_entities.csv", "text/csv") ] for filename, mime_type in files_to_add: try: file_url = f"https://huggingface.co/spaces/Malaji71/list/resolve/main/{filename}" response = requests.get(file_url) if response.status_code == 200: zip_file.writestr(filename, response.content) except: # Add placeholder if file not available zip_file.writestr(f"{filename}.missing", f"File {filename} not available") st.download_button( label="📦 Download ZIP Package", data=zip_buffer.getvalue(), file_name=f"terror_finance_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip", mime="application/zip", help="Download all analysis files in a single ZIP package" ) except Exception as e: st.error(f"Error creating ZIP file: {str(e)}") st.markdown("---") st.markdown("**📁 Analysis Package Contents:**") st.markdown(""" - `complete_analysis.json` - Complete analysis data in JSON format - `analysis_report.txt` - Detailed human-readable report - `executive_summary.txt` - Executive summary - `uncertain_entities.csv` - Entities requiring manual review **🤖 Generated by Pariente AI - Advanced Intelligence Analytics** """) # Footer st.markdown("---") col1, col2 = st.columns([3, 1]) with col1: st.markdown("🛡️ **Terror Finance & Maritime Watch** - Monitoring entities involved in terror financing and maritime sanctions evasion") st.markdown(f"📊 Data processed: {len(data['individuals'])} individuals, {len(data['companies'])} companies, {len(data['vessels'])} vessels") st.markdown(f"🕒 Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") with col2: st.markdown("**Powered by**") st.markdown("🤖 **Pariente AI**") st.markdown("*Advanced Intelligence Analytics*")