Spaces:

obaes
/

fraudoo

Sleeping

File size: 11,129 Bytes

import gradio as gr
import os
from fraud_analyzer import FraudAnalyzer
from vector_service import VectorService
import json
import uuid
import pandas as pd
import re
import shutil

# Initialize
API_KEY = os.environ.get("GOOGLE_API_KEY")
analyzer = FraudAnalyzer(API_KEY) if API_KEY else None
vector_db = VectorService()
UPLOAD_DIR = os.path.abspath("./uploads")
STATIC_DIR = os.path.abspath("./static")
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(STATIC_DIR, exist_ok=True)

# Serve static files natively through Gradio
gr.set_static_paths(paths=["static/", "uploads/"])

def parse_flash_metrics(analysis_text):
    """Attempt to parse structured fields from Flash's response."""
    metrics = {"label": "Unknown", "amount": "0", "fraud_score": "0"}
    try:
        # Sometimes LLM Services wraps in ```json ... ```
        clean_text = analysis_text
        json_match = re.search(r"```json\s*(\{.*?\})\s*```", analysis_text, re.DOTALL)
        if json_match:
            try:
                data = json.loads(json_match.group(1))
                metrics.update({k: str(v) for k, v in data.items() if k in metrics})
                return metrics
            except:
                clean_text = json_match.group(1)

        # Fallback to regex search for individual fields
        label_match = re.search(r"\"label\":\s*\"([^\"]+)\"", clean_text)
        amount_match = re.search(r"\"amount\":\s*\"?([^\",\s]+)\"?", clean_text)
        score_match = re.search(r"\"fraud_score\":\s*\"?(\d+)\"?", clean_text)
        
        if label_match: metrics["label"] = label_match.group(1)
        if amount_match: metrics["amount"] = amount_match.group(1)
        if score_match: metrics["fraud_score"] = score_match.group(1)
    except Exception as e:
        print(f"Error parsing metrics: {e}")
    return metrics

def process_document(file_path):
    """
    Analyzes a document for fraud using LLM Services 3 Flash and Nano Banana.
    Extracts structured data, detects duplicates, and generates a fraud score.
    
    Args:
        file_path (str): The local path to the document file (Image or PDF) to be analyzed.
    """
    if not API_KEY:
        return "Error: GOOGLE_API_KEY not set.", None, None, None, None, get_history_df()
    if not file_path:
        return "Please upload a document.", None, None, None, None, get_history_df()

    filename = os.path.basename(file_path)
    persistent_path = os.path.join(UPLOAD_DIR, f"{str(uuid.uuid4())[:8]}_{filename}")
    shutil.copy(file_path, persistent_path)

    dup_result = vector_db.find_duplicates(persistent_path)
    dup_msg = "No duplicates found."
    if dup_result:
        dup_msg = f"⚠️ DUPLICATE DETECTED: {dup_result['type']}"

    result = analyzer.analyze_document(persistent_path)
    metrics = parse_flash_metrics(result['llm_analysis'])
    
    doc_id = str(uuid.uuid4())[:8]
    
    score_val = metrics.get('fraud_score', '0')
    formatted_score = f"{score_val}/100"
    
    meta = result['metadata']
    meta['llm_analysis'] = result['llm_analysis']
    meta['filename'] = filename
    meta['label'] = metrics['label']
    meta['amount'] = metrics['amount']
    meta['fraud_score'] = formatted_score
    meta['file_path'] = persistent_path
    
    vector_db.add_document(persistent_path, doc_id, metadata={k: str(v) for k, v in meta.items() if v is not None})

    return f"ID: {doc_id} | {dup_msg}", result['llm_analysis'], json.dumps(result['metadata'], indent=2), doc_id, persistent_path, get_history_df()

def get_history_df():
    """
    Retrieves the complete history of analyzed documents from the vector database.
    Returns a list of documents with their IDs, labels, amounts, and fraud scores.
    """
    docs = vector_db.collection.get()
    if not docs or not docs['ids']:
        return pd.DataFrame(columns=["ID", "Label", "Amount", "Fraud Score"])
    
    data = []
    for i in range(len(docs['ids'])):
        meta = docs['metadatas'][i]
        score = meta.get('fraud_score', '0')
        if "/" not in str(score):
            score = f"{score}/100"
            
        data.append([
            docs['ids'][i],
            meta.get('label', 'Unknown'),
            meta.get('amount', '0'),
            score
        ])
    return pd.DataFrame(data, columns=["ID", "Label", "Amount", "Fraud Score"])

def delete_analysis(doc_id):
    """
    Deletes a specific fraud analysis record and its associated files using its unique ID.
    
    Args:
        doc_id (str): The unique identifier of the analysis record to be deleted.
    """
    if not doc_id:
        return "Please select an analysis to delete first.", get_history_df()
    
    vector_db.delete_document(doc_id)
    return f"Successfully deleted ID: {doc_id}", get_history_df()

def on_select_history(evt: gr.SelectData, df):
    """Triggered when a row in the history table is clicked."""
    doc_id = df.iloc[evt.index[0]]["ID"]
    msg, analysis, meta_str, file_path = retrieve_document(doc_id)
    # Return values + the ID to store in gr.State
    return msg, analysis, meta_str, file_path, gr.Tabs(selected=2), doc_id

def retrieve_document(doc_id):
    """
    Fetches the detailed analysis results, technical metadata, and the original document for a given ID.
    
    Args:
        doc_id (str): The unique identifier of the document analysis to retrieve.
    """
    if not doc_id:
        return "Enter ID", None, None, None
    
    doc = vector_db.get_document(doc_id)
    if not doc:
        return f"Not found: {doc_id}", None, None, None
    
    meta = doc['metadata']
    # Fallback for historical 'gemini_analysis' key
    analysis = meta.get('llm_analysis', meta.get('gemini_analysis', "No analysis."))
    file_path = meta.get('file_path')
    
    if not os.path.exists(file_path):
        return f"Error: File missing at {file_path}", analysis, "{}", None

    display_meta = {k: v for k, v in meta.items() if k not in ['llm_analysis', 'gemini_analysis', 'file_path']}
    return f"Retrieved: {meta.get('filename')}", analysis, json.dumps(display_meta, indent=2), file_path

css = """
body { background-color: #f0f2f5; font-family: 'Inter', sans-serif; }
.container { max-width: 1000px; margin: auto; padding: 20px; }
.header { text-align: center; margin-bottom: 40px; }
.result-box { background: white; border-radius: 8px; padding: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
.footer-links { text-align: center; padding: 20px; border-top: 1px solid #e2e8f0; margin-top: 40px; }
.footer-links a { margin: 0 15px; text-decoration: none; color: #4f46e5; font-weight: 600; }
.help-card { background: white; padding: 2rem; border-radius: 15px; border-left: 5px solid #4f46e5; margin-bottom: 1rem; }
"""

with gr.Blocks() as demo:
    gr.Markdown("# 🛡️ Documentary Fraud & History Explorer")
    
    with gr.Tabs() as main_tabs:
        with gr.TabItem("New Analysis", id=0):
            with gr.Row():
                with gr.Column(scale=1):
                    file_input = gr.File(label="Upload Document")
                    submit_btn = gr.Button("🔍 Analyze", variant="primary")
                with gr.Column(scale=2):
                    dup_output = gr.Textbox(label="Status", interactive=False)
                    preview_input = gr.File(label="Document Preview", interactive=False)
            
            with gr.Tabs():
                with gr.TabItem("Analysis Result"):
                    analysis_output = gr.Markdown()
                with gr.TabItem("Technical Data"):
                    meta_output = gr.Code(language="json")

        with gr.TabItem("History Overview", id=1):
            history_table = gr.Dataframe(
                value=get_history_df(),
                headers=["ID", "Label", "Amount", "Fraud Score"],
                interactive=False,
                label="Click a row to view details"
            )
            selected_id_state = gr.State("") # To store the ID to delete
            with gr.Row():
                refresh_btn = gr.Button("🔄 Refresh List")
                delete_btn = gr.Button("🗑️ Delete Selected Analysis", variant="stop")
            delete_status = gr.Textbox(label="Deletion Status", interactive=False)

        with gr.TabItem("Document Detail", id=2):
            with gr.Row():
                search_id = gr.Textbox(label="Document ID")
                search_btn = gr.Button("🔎 View Details")
            
            detail_msg = gr.Textbox(label="Status", interactive=False)
            with gr.Row():
                with gr.Column(scale=1):
                    detail_preview = gr.File(label="Preview / Download")
                with gr.Column(scale=2):
                    detail_analysis = gr.Markdown()
                    detail_meta = gr.Code(language="json")

        with gr.TabItem("Help & Legal", id=3):
            with gr.Column(elem_classes="container"):
                gr.Markdown("## 🐢 Fraudoo Support & Legal")
                
                with gr.Row():
                    with gr.Column(elem_classes="help-card"):
                        gr.Markdown("### 📧 Support\nNeed assistance? Our support team is ready to help.")
                        gr.HTML('<a href="/static/support.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Open Support Page →</a>')
                    
                    with gr.Column(elem_classes="help-card"):
                        gr.Markdown("### ⚖️ Legal\nReview our terms and how we protect your data.")
                        gr.HTML('<a href="/static/privacy.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Privacy Policy</a>')
                        gr.HTML('<br><a href="/static/terms.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Terms of Service</a>')

    gr.HTML("""
    <div class="footer-links">
        <a href="/static/support.html" target="_blank">Support</a>
        <a href="/static/privacy.html" target="_blank">Privacy</a>
        <a href="/static/terms.html" target="_blank">Terms</a>
        <span style="color: #64748b; margin-left: 20px;">© 2026 Fraudoo 🐢</span>
    </div>
    """)

    # Events
    submit_btn.click(
        fn=process_document,
        inputs=[file_input],
        outputs=[dup_output, analysis_output, meta_output, search_id, preview_input, history_table]
    )
    
    search_btn.click(
        fn=retrieve_document,
        inputs=[search_id],
        outputs=[detail_msg, detail_analysis, detail_meta, detail_preview]
    )
    
    history_table.select(
        fn=on_select_history,
        inputs=[history_table],
        outputs=[detail_msg, detail_analysis, detail_meta, detail_preview, main_tabs, selected_id_state]
    )

    delete_btn.click(
        fn=delete_analysis,
        inputs=[selected_id_state],
        outputs=[delete_status, history_table]
    )

    refresh_btn.click(fn=get_history_df, outputs=[history_table])

if __name__ == "__main__":
    # Ensure UPLOAD_DIR exists and is used
    demo.launch(
        mcp_server=True, 
        theme=gr.themes.Soft(), 
        css=css,
        allowed_paths=[STATIC_DIR, UPLOAD_DIR]
    )