import gradio as gr import os from fraud_analyzer import FraudAnalyzer from vector_service import VectorService import json import uuid import pandas as pd import re import shutil # Initialize API_KEY = os.environ.get("GOOGLE_API_KEY") analyzer = FraudAnalyzer(API_KEY) if API_KEY else None vector_db = VectorService() UPLOAD_DIR = os.path.abspath("./uploads") STATIC_DIR = os.path.abspath("./static") os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(STATIC_DIR, exist_ok=True) # Serve static files natively through Gradio gr.set_static_paths(paths=["static/", "uploads/"]) def parse_flash_metrics(analysis_text): """Attempt to parse structured fields from Flash's response.""" metrics = {"label": "Unknown", "amount": "0", "fraud_score": "0"} try: # Sometimes LLM Services wraps in ```json ... ``` clean_text = analysis_text json_match = re.search(r"```json\s*(\{.*?\})\s*```", analysis_text, re.DOTALL) if json_match: try: data = json.loads(json_match.group(1)) metrics.update({k: str(v) for k, v in data.items() if k in metrics}) return metrics except: clean_text = json_match.group(1) # Fallback to regex search for individual fields label_match = re.search(r"\"label\":\s*\"([^\"]+)\"", clean_text) amount_match = re.search(r"\"amount\":\s*\"?([^\",\s]+)\"?", clean_text) score_match = re.search(r"\"fraud_score\":\s*\"?(\d+)\"?", clean_text) if label_match: metrics["label"] = label_match.group(1) if amount_match: metrics["amount"] = amount_match.group(1) if score_match: metrics["fraud_score"] = score_match.group(1) except Exception as e: print(f"Error parsing metrics: {e}") return metrics def process_document(file_path): """ Analyzes a document for fraud using LLM Services 3 Flash and Nano Banana. Extracts structured data, detects duplicates, and generates a fraud score. Args: file_path (str): The local path to the document file (Image or PDF) to be analyzed. """ if not API_KEY: return "Error: GOOGLE_API_KEY not set.", None, None, None, None, get_history_df() if not file_path: return "Please upload a document.", None, None, None, None, get_history_df() filename = os.path.basename(file_path) persistent_path = os.path.join(UPLOAD_DIR, f"{str(uuid.uuid4())[:8]}_{filename}") shutil.copy(file_path, persistent_path) dup_result = vector_db.find_duplicates(persistent_path) dup_msg = "No duplicates found." if dup_result: dup_msg = f"⚠️ DUPLICATE DETECTED: {dup_result['type']}" result = analyzer.analyze_document(persistent_path) metrics = parse_flash_metrics(result['llm_analysis']) doc_id = str(uuid.uuid4())[:8] score_val = metrics.get('fraud_score', '0') formatted_score = f"{score_val}/100" meta = result['metadata'] meta['llm_analysis'] = result['llm_analysis'] meta['filename'] = filename meta['label'] = metrics['label'] meta['amount'] = metrics['amount'] meta['fraud_score'] = formatted_score meta['file_path'] = persistent_path vector_db.add_document(persistent_path, doc_id, metadata={k: str(v) for k, v in meta.items() if v is not None}) return f"ID: {doc_id} | {dup_msg}", result['llm_analysis'], json.dumps(result['metadata'], indent=2), doc_id, persistent_path, get_history_df() def get_history_df(): """ Retrieves the complete history of analyzed documents from the vector database. Returns a list of documents with their IDs, labels, amounts, and fraud scores. """ docs = vector_db.collection.get() if not docs or not docs['ids']: return pd.DataFrame(columns=["ID", "Label", "Amount", "Fraud Score"]) data = [] for i in range(len(docs['ids'])): meta = docs['metadatas'][i] score = meta.get('fraud_score', '0') if "/" not in str(score): score = f"{score}/100" data.append([ docs['ids'][i], meta.get('label', 'Unknown'), meta.get('amount', '0'), score ]) return pd.DataFrame(data, columns=["ID", "Label", "Amount", "Fraud Score"]) def delete_analysis(doc_id): """ Deletes a specific fraud analysis record and its associated files using its unique ID. Args: doc_id (str): The unique identifier of the analysis record to be deleted. """ if not doc_id: return "Please select an analysis to delete first.", get_history_df() vector_db.delete_document(doc_id) return f"Successfully deleted ID: {doc_id}", get_history_df() def on_select_history(evt: gr.SelectData, df): """Triggered when a row in the history table is clicked.""" doc_id = df.iloc[evt.index[0]]["ID"] msg, analysis, meta_str, file_path = retrieve_document(doc_id) # Return values + the ID to store in gr.State return msg, analysis, meta_str, file_path, gr.Tabs(selected=2), doc_id def retrieve_document(doc_id): """ Fetches the detailed analysis results, technical metadata, and the original document for a given ID. Args: doc_id (str): The unique identifier of the document analysis to retrieve. """ if not doc_id: return "Enter ID", None, None, None doc = vector_db.get_document(doc_id) if not doc: return f"Not found: {doc_id}", None, None, None meta = doc['metadata'] # Fallback for historical 'gemini_analysis' key analysis = meta.get('llm_analysis', meta.get('gemini_analysis', "No analysis.")) file_path = meta.get('file_path') if not os.path.exists(file_path): return f"Error: File missing at {file_path}", analysis, "{}", None display_meta = {k: v for k, v in meta.items() if k not in ['llm_analysis', 'gemini_analysis', 'file_path']} return f"Retrieved: {meta.get('filename')}", analysis, json.dumps(display_meta, indent=2), file_path css = """ body { background-color: #f0f2f5; font-family: 'Inter', sans-serif; } .container { max-width: 1000px; margin: auto; padding: 20px; } .header { text-align: center; margin-bottom: 40px; } .result-box { background: white; border-radius: 8px; padding: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); } .footer-links { text-align: center; padding: 20px; border-top: 1px solid #e2e8f0; margin-top: 40px; } .footer-links a { margin: 0 15px; text-decoration: none; color: #4f46e5; font-weight: 600; } .help-card { background: white; padding: 2rem; border-radius: 15px; border-left: 5px solid #4f46e5; margin-bottom: 1rem; } """ with gr.Blocks() as demo: gr.Markdown("# 🛡️ Documentary Fraud & History Explorer") with gr.Tabs() as main_tabs: with gr.TabItem("New Analysis", id=0): with gr.Row(): with gr.Column(scale=1): file_input = gr.File(label="Upload Document") submit_btn = gr.Button("🔍 Analyze", variant="primary") with gr.Column(scale=2): dup_output = gr.Textbox(label="Status", interactive=False) preview_input = gr.File(label="Document Preview", interactive=False) with gr.Tabs(): with gr.TabItem("Analysis Result"): analysis_output = gr.Markdown() with gr.TabItem("Technical Data"): meta_output = gr.Code(language="json") with gr.TabItem("History Overview", id=1): history_table = gr.Dataframe( value=get_history_df(), headers=["ID", "Label", "Amount", "Fraud Score"], interactive=False, label="Click a row to view details" ) selected_id_state = gr.State("") # To store the ID to delete with gr.Row(): refresh_btn = gr.Button("🔄 Refresh List") delete_btn = gr.Button("🗑️ Delete Selected Analysis", variant="stop") delete_status = gr.Textbox(label="Deletion Status", interactive=False) with gr.TabItem("Document Detail", id=2): with gr.Row(): search_id = gr.Textbox(label="Document ID") search_btn = gr.Button("🔎 View Details") detail_msg = gr.Textbox(label="Status", interactive=False) with gr.Row(): with gr.Column(scale=1): detail_preview = gr.File(label="Preview / Download") with gr.Column(scale=2): detail_analysis = gr.Markdown() detail_meta = gr.Code(language="json") with gr.TabItem("Help & Legal", id=3): with gr.Column(elem_classes="container"): gr.Markdown("## 🐢 Fraudoo Support & Legal") with gr.Row(): with gr.Column(elem_classes="help-card"): gr.Markdown("### 📧 Support\nNeed assistance? Our support team is ready to help.") gr.HTML('Open Support Page →') with gr.Column(elem_classes="help-card"): gr.Markdown("### ⚖️ Legal\nReview our terms and how we protect your data.") gr.HTML('Privacy Policy') gr.HTML('
Terms of Service') gr.HTML(""" """) # Events submit_btn.click( fn=process_document, inputs=[file_input], outputs=[dup_output, analysis_output, meta_output, search_id, preview_input, history_table] ) search_btn.click( fn=retrieve_document, inputs=[search_id], outputs=[detail_msg, detail_analysis, detail_meta, detail_preview] ) history_table.select( fn=on_select_history, inputs=[history_table], outputs=[detail_msg, detail_analysis, detail_meta, detail_preview, main_tabs, selected_id_state] ) delete_btn.click( fn=delete_analysis, inputs=[selected_id_state], outputs=[delete_status, history_table] ) refresh_btn.click(fn=get_history_df, outputs=[history_table]) if __name__ == "__main__": # Ensure UPLOAD_DIR exists and is used demo.launch( mcp_server=True, theme=gr.themes.Soft(), css=css, allowed_paths=[STATIC_DIR, UPLOAD_DIR] )