File size: 11,129 Bytes
8e025ca
 
 
 
 
 
 
 
 
 
 
 
 
 
71680bc
 
8e025ca
71680bc
 
01649f1
 
8e025ca
 
 
 
 
71680bc
8e025ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d30da2
71680bc
1d30da2
 
 
 
 
8e025ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71680bc
8e025ca
 
 
 
 
 
 
71680bc
8e025ca
 
 
 
 
 
 
 
71680bc
8e025ca
 
1d30da2
 
 
 
8e025ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d30da2
 
 
 
 
 
8e025ca
 
 
 
 
 
 
 
 
 
 
 
 
 
1d30da2
 
 
 
 
 
8e025ca
 
 
 
 
 
 
 
71680bc
 
8e025ca
 
 
 
 
71680bc
8e025ca
 
 
 
 
 
 
71680bc
 
 
8e025ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71680bc
 
 
 
01649f1
 
 
 
 
 
 
 
 
 
 
71680bc
01649f1
 
 
71680bc
 
 
 
8e025ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71680bc
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import gradio as gr
import os
from fraud_analyzer import FraudAnalyzer
from vector_service import VectorService
import json
import uuid
import pandas as pd
import re
import shutil

# Initialize
API_KEY = os.environ.get("GOOGLE_API_KEY")
analyzer = FraudAnalyzer(API_KEY) if API_KEY else None
vector_db = VectorService()
UPLOAD_DIR = os.path.abspath("./uploads")
STATIC_DIR = os.path.abspath("./static")
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(STATIC_DIR, exist_ok=True)

# Serve static files natively through Gradio
gr.set_static_paths(paths=["static/", "uploads/"])

def parse_flash_metrics(analysis_text):
    """Attempt to parse structured fields from Flash's response."""
    metrics = {"label": "Unknown", "amount": "0", "fraud_score": "0"}
    try:
        # Sometimes LLM Services wraps in ```json ... ```
        clean_text = analysis_text
        json_match = re.search(r"```json\s*(\{.*?\})\s*```", analysis_text, re.DOTALL)
        if json_match:
            try:
                data = json.loads(json_match.group(1))
                metrics.update({k: str(v) for k, v in data.items() if k in metrics})
                return metrics
            except:
                clean_text = json_match.group(1)

        # Fallback to regex search for individual fields
        label_match = re.search(r"\"label\":\s*\"([^\"]+)\"", clean_text)
        amount_match = re.search(r"\"amount\":\s*\"?([^\",\s]+)\"?", clean_text)
        score_match = re.search(r"\"fraud_score\":\s*\"?(\d+)\"?", clean_text)
        
        if label_match: metrics["label"] = label_match.group(1)
        if amount_match: metrics["amount"] = amount_match.group(1)
        if score_match: metrics["fraud_score"] = score_match.group(1)
    except Exception as e:
        print(f"Error parsing metrics: {e}")
    return metrics

def process_document(file_path):
    """
    Analyzes a document for fraud using LLM Services 3 Flash and Nano Banana.
    Extracts structured data, detects duplicates, and generates a fraud score.
    
    Args:
        file_path (str): The local path to the document file (Image or PDF) to be analyzed.
    """
    if not API_KEY:
        return "Error: GOOGLE_API_KEY not set.", None, None, None, None, get_history_df()
    if not file_path:
        return "Please upload a document.", None, None, None, None, get_history_df()

    filename = os.path.basename(file_path)
    persistent_path = os.path.join(UPLOAD_DIR, f"{str(uuid.uuid4())[:8]}_{filename}")
    shutil.copy(file_path, persistent_path)

    dup_result = vector_db.find_duplicates(persistent_path)
    dup_msg = "No duplicates found."
    if dup_result:
        dup_msg = f"⚠️ DUPLICATE DETECTED: {dup_result['type']}"

    result = analyzer.analyze_document(persistent_path)
    metrics = parse_flash_metrics(result['llm_analysis'])
    
    doc_id = str(uuid.uuid4())[:8]
    
    score_val = metrics.get('fraud_score', '0')
    formatted_score = f"{score_val}/100"
    
    meta = result['metadata']
    meta['llm_analysis'] = result['llm_analysis']
    meta['filename'] = filename
    meta['label'] = metrics['label']
    meta['amount'] = metrics['amount']
    meta['fraud_score'] = formatted_score
    meta['file_path'] = persistent_path
    
    vector_db.add_document(persistent_path, doc_id, metadata={k: str(v) for k, v in meta.items() if v is not None})

    return f"ID: {doc_id} | {dup_msg}", result['llm_analysis'], json.dumps(result['metadata'], indent=2), doc_id, persistent_path, get_history_df()

def get_history_df():
    """
    Retrieves the complete history of analyzed documents from the vector database.
    Returns a list of documents with their IDs, labels, amounts, and fraud scores.
    """
    docs = vector_db.collection.get()
    if not docs or not docs['ids']:
        return pd.DataFrame(columns=["ID", "Label", "Amount", "Fraud Score"])
    
    data = []
    for i in range(len(docs['ids'])):
        meta = docs['metadatas'][i]
        score = meta.get('fraud_score', '0')
        if "/" not in str(score):
            score = f"{score}/100"
            
        data.append([
            docs['ids'][i],
            meta.get('label', 'Unknown'),
            meta.get('amount', '0'),
            score
        ])
    return pd.DataFrame(data, columns=["ID", "Label", "Amount", "Fraud Score"])

def delete_analysis(doc_id):
    """
    Deletes a specific fraud analysis record and its associated files using its unique ID.
    
    Args:
        doc_id (str): The unique identifier of the analysis record to be deleted.
    """
    if not doc_id:
        return "Please select an analysis to delete first.", get_history_df()
    
    vector_db.delete_document(doc_id)
    return f"Successfully deleted ID: {doc_id}", get_history_df()

def on_select_history(evt: gr.SelectData, df):
    """Triggered when a row in the history table is clicked."""
    doc_id = df.iloc[evt.index[0]]["ID"]
    msg, analysis, meta_str, file_path = retrieve_document(doc_id)
    # Return values + the ID to store in gr.State
    return msg, analysis, meta_str, file_path, gr.Tabs(selected=2), doc_id

def retrieve_document(doc_id):
    """
    Fetches the detailed analysis results, technical metadata, and the original document for a given ID.
    
    Args:
        doc_id (str): The unique identifier of the document analysis to retrieve.
    """
    if not doc_id:
        return "Enter ID", None, None, None
    
    doc = vector_db.get_document(doc_id)
    if not doc:
        return f"Not found: {doc_id}", None, None, None
    
    meta = doc['metadata']
    # Fallback for historical 'gemini_analysis' key
    analysis = meta.get('llm_analysis', meta.get('gemini_analysis', "No analysis."))
    file_path = meta.get('file_path')
    
    if not os.path.exists(file_path):
        return f"Error: File missing at {file_path}", analysis, "{}", None

    display_meta = {k: v for k, v in meta.items() if k not in ['llm_analysis', 'gemini_analysis', 'file_path']}
    return f"Retrieved: {meta.get('filename')}", analysis, json.dumps(display_meta, indent=2), file_path

css = """
body { background-color: #f0f2f5; font-family: 'Inter', sans-serif; }
.container { max-width: 1000px; margin: auto; padding: 20px; }
.header { text-align: center; margin-bottom: 40px; }
.result-box { background: white; border-radius: 8px; padding: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
.footer-links { text-align: center; padding: 20px; border-top: 1px solid #e2e8f0; margin-top: 40px; }
.footer-links a { margin: 0 15px; text-decoration: none; color: #4f46e5; font-weight: 600; }
.help-card { background: white; padding: 2rem; border-radius: 15px; border-left: 5px solid #4f46e5; margin-bottom: 1rem; }
"""

with gr.Blocks() as demo:
    gr.Markdown("# πŸ›‘οΈ Documentary Fraud & History Explorer")
    
    with gr.Tabs() as main_tabs:
        with gr.TabItem("New Analysis", id=0):
            with gr.Row():
                with gr.Column(scale=1):
                    file_input = gr.File(label="Upload Document")
                    submit_btn = gr.Button("πŸ” Analyze", variant="primary")
                with gr.Column(scale=2):
                    dup_output = gr.Textbox(label="Status", interactive=False)
                    preview_input = gr.File(label="Document Preview", interactive=False)
            
            with gr.Tabs():
                with gr.TabItem("Analysis Result"):
                    analysis_output = gr.Markdown()
                with gr.TabItem("Technical Data"):
                    meta_output = gr.Code(language="json")

        with gr.TabItem("History Overview", id=1):
            history_table = gr.Dataframe(
                value=get_history_df(),
                headers=["ID", "Label", "Amount", "Fraud Score"],
                interactive=False,
                label="Click a row to view details"
            )
            selected_id_state = gr.State("") # To store the ID to delete
            with gr.Row():
                refresh_btn = gr.Button("πŸ”„ Refresh List")
                delete_btn = gr.Button("πŸ—‘οΈ Delete Selected Analysis", variant="stop")
            delete_status = gr.Textbox(label="Deletion Status", interactive=False)

        with gr.TabItem("Document Detail", id=2):
            with gr.Row():
                search_id = gr.Textbox(label="Document ID")
                search_btn = gr.Button("πŸ”Ž View Details")
            
            detail_msg = gr.Textbox(label="Status", interactive=False)
            with gr.Row():
                with gr.Column(scale=1):
                    detail_preview = gr.File(label="Preview / Download")
                with gr.Column(scale=2):
                    detail_analysis = gr.Markdown()
                    detail_meta = gr.Code(language="json")

        with gr.TabItem("Help & Legal", id=3):
            with gr.Column(elem_classes="container"):
                gr.Markdown("## 🐒 Fraudoo Support & Legal")
                
                with gr.Row():
                    with gr.Column(elem_classes="help-card"):
                        gr.Markdown("### πŸ“§ Support\nNeed assistance? Our support team is ready to help.")
                        gr.HTML('<a href="/static/support.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Open Support Page β†’</a>')
                    
                    with gr.Column(elem_classes="help-card"):
                        gr.Markdown("### βš–οΈ Legal\nReview our terms and how we protect your data.")
                        gr.HTML('<a href="/static/privacy.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Privacy Policy</a>')
                        gr.HTML('<br><a href="/static/terms.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Terms of Service</a>')

    gr.HTML("""
    <div class="footer-links">
        <a href="/static/support.html" target="_blank">Support</a>
        <a href="/static/privacy.html" target="_blank">Privacy</a>
        <a href="/static/terms.html" target="_blank">Terms</a>
        <span style="color: #64748b; margin-left: 20px;">© 2026 Fraudoo 🐒</span>
    </div>
    """)

    # Events
    submit_btn.click(
        fn=process_document,
        inputs=[file_input],
        outputs=[dup_output, analysis_output, meta_output, search_id, preview_input, history_table]
    )
    
    search_btn.click(
        fn=retrieve_document,
        inputs=[search_id],
        outputs=[detail_msg, detail_analysis, detail_meta, detail_preview]
    )
    
    history_table.select(
        fn=on_select_history,
        inputs=[history_table],
        outputs=[detail_msg, detail_analysis, detail_meta, detail_preview, main_tabs, selected_id_state]
    )

    delete_btn.click(
        fn=delete_analysis,
        inputs=[selected_id_state],
        outputs=[delete_status, history_table]
    )

    refresh_btn.click(fn=get_history_df, outputs=[history_table])

if __name__ == "__main__":
    # Ensure UPLOAD_DIR exists and is used
    demo.launch(
        mcp_server=True, 
        theme=gr.themes.Soft(), 
        css=css,
        allowed_paths=[STATIC_DIR, UPLOAD_DIR]
    )