fraudoo / app.py
obaes's picture
Upload 10 files
01649f1 verified
import gradio as gr
import os
from fraud_analyzer import FraudAnalyzer
from vector_service import VectorService
import json
import uuid
import pandas as pd
import re
import shutil
# Initialize
API_KEY = os.environ.get("GOOGLE_API_KEY")
analyzer = FraudAnalyzer(API_KEY) if API_KEY else None
vector_db = VectorService()
UPLOAD_DIR = os.path.abspath("./uploads")
STATIC_DIR = os.path.abspath("./static")
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(STATIC_DIR, exist_ok=True)
# Serve static files natively through Gradio
gr.set_static_paths(paths=["static/", "uploads/"])
def parse_flash_metrics(analysis_text):
"""Attempt to parse structured fields from Flash's response."""
metrics = {"label": "Unknown", "amount": "0", "fraud_score": "0"}
try:
# Sometimes LLM Services wraps in ```json ... ```
clean_text = analysis_text
json_match = re.search(r"```json\s*(\{.*?\})\s*```", analysis_text, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group(1))
metrics.update({k: str(v) for k, v in data.items() if k in metrics})
return metrics
except:
clean_text = json_match.group(1)
# Fallback to regex search for individual fields
label_match = re.search(r"\"label\":\s*\"([^\"]+)\"", clean_text)
amount_match = re.search(r"\"amount\":\s*\"?([^\",\s]+)\"?", clean_text)
score_match = re.search(r"\"fraud_score\":\s*\"?(\d+)\"?", clean_text)
if label_match: metrics["label"] = label_match.group(1)
if amount_match: metrics["amount"] = amount_match.group(1)
if score_match: metrics["fraud_score"] = score_match.group(1)
except Exception as e:
print(f"Error parsing metrics: {e}")
return metrics
def process_document(file_path):
"""
Analyzes a document for fraud using LLM Services 3 Flash and Nano Banana.
Extracts structured data, detects duplicates, and generates a fraud score.
Args:
file_path (str): The local path to the document file (Image or PDF) to be analyzed.
"""
if not API_KEY:
return "Error: GOOGLE_API_KEY not set.", None, None, None, None, get_history_df()
if not file_path:
return "Please upload a document.", None, None, None, None, get_history_df()
filename = os.path.basename(file_path)
persistent_path = os.path.join(UPLOAD_DIR, f"{str(uuid.uuid4())[:8]}_{filename}")
shutil.copy(file_path, persistent_path)
dup_result = vector_db.find_duplicates(persistent_path)
dup_msg = "No duplicates found."
if dup_result:
dup_msg = f"⚠️ DUPLICATE DETECTED: {dup_result['type']}"
result = analyzer.analyze_document(persistent_path)
metrics = parse_flash_metrics(result['llm_analysis'])
doc_id = str(uuid.uuid4())[:8]
score_val = metrics.get('fraud_score', '0')
formatted_score = f"{score_val}/100"
meta = result['metadata']
meta['llm_analysis'] = result['llm_analysis']
meta['filename'] = filename
meta['label'] = metrics['label']
meta['amount'] = metrics['amount']
meta['fraud_score'] = formatted_score
meta['file_path'] = persistent_path
vector_db.add_document(persistent_path, doc_id, metadata={k: str(v) for k, v in meta.items() if v is not None})
return f"ID: {doc_id} | {dup_msg}", result['llm_analysis'], json.dumps(result['metadata'], indent=2), doc_id, persistent_path, get_history_df()
def get_history_df():
"""
Retrieves the complete history of analyzed documents from the vector database.
Returns a list of documents with their IDs, labels, amounts, and fraud scores.
"""
docs = vector_db.collection.get()
if not docs or not docs['ids']:
return pd.DataFrame(columns=["ID", "Label", "Amount", "Fraud Score"])
data = []
for i in range(len(docs['ids'])):
meta = docs['metadatas'][i]
score = meta.get('fraud_score', '0')
if "/" not in str(score):
score = f"{score}/100"
data.append([
docs['ids'][i],
meta.get('label', 'Unknown'),
meta.get('amount', '0'),
score
])
return pd.DataFrame(data, columns=["ID", "Label", "Amount", "Fraud Score"])
def delete_analysis(doc_id):
"""
Deletes a specific fraud analysis record and its associated files using its unique ID.
Args:
doc_id (str): The unique identifier of the analysis record to be deleted.
"""
if not doc_id:
return "Please select an analysis to delete first.", get_history_df()
vector_db.delete_document(doc_id)
return f"Successfully deleted ID: {doc_id}", get_history_df()
def on_select_history(evt: gr.SelectData, df):
"""Triggered when a row in the history table is clicked."""
doc_id = df.iloc[evt.index[0]]["ID"]
msg, analysis, meta_str, file_path = retrieve_document(doc_id)
# Return values + the ID to store in gr.State
return msg, analysis, meta_str, file_path, gr.Tabs(selected=2), doc_id
def retrieve_document(doc_id):
"""
Fetches the detailed analysis results, technical metadata, and the original document for a given ID.
Args:
doc_id (str): The unique identifier of the document analysis to retrieve.
"""
if not doc_id:
return "Enter ID", None, None, None
doc = vector_db.get_document(doc_id)
if not doc:
return f"Not found: {doc_id}", None, None, None
meta = doc['metadata']
# Fallback for historical 'gemini_analysis' key
analysis = meta.get('llm_analysis', meta.get('gemini_analysis', "No analysis."))
file_path = meta.get('file_path')
if not os.path.exists(file_path):
return f"Error: File missing at {file_path}", analysis, "{}", None
display_meta = {k: v for k, v in meta.items() if k not in ['llm_analysis', 'gemini_analysis', 'file_path']}
return f"Retrieved: {meta.get('filename')}", analysis, json.dumps(display_meta, indent=2), file_path
css = """
body { background-color: #f0f2f5; font-family: 'Inter', sans-serif; }
.container { max-width: 1000px; margin: auto; padding: 20px; }
.header { text-align: center; margin-bottom: 40px; }
.result-box { background: white; border-radius: 8px; padding: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
.footer-links { text-align: center; padding: 20px; border-top: 1px solid #e2e8f0; margin-top: 40px; }
.footer-links a { margin: 0 15px; text-decoration: none; color: #4f46e5; font-weight: 600; }
.help-card { background: white; padding: 2rem; border-radius: 15px; border-left: 5px solid #4f46e5; margin-bottom: 1rem; }
"""
with gr.Blocks() as demo:
gr.Markdown("# πŸ›‘οΈ Documentary Fraud & History Explorer")
with gr.Tabs() as main_tabs:
with gr.TabItem("New Analysis", id=0):
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(label="Upload Document")
submit_btn = gr.Button("πŸ” Analyze", variant="primary")
with gr.Column(scale=2):
dup_output = gr.Textbox(label="Status", interactive=False)
preview_input = gr.File(label="Document Preview", interactive=False)
with gr.Tabs():
with gr.TabItem("Analysis Result"):
analysis_output = gr.Markdown()
with gr.TabItem("Technical Data"):
meta_output = gr.Code(language="json")
with gr.TabItem("History Overview", id=1):
history_table = gr.Dataframe(
value=get_history_df(),
headers=["ID", "Label", "Amount", "Fraud Score"],
interactive=False,
label="Click a row to view details"
)
selected_id_state = gr.State("") # To store the ID to delete
with gr.Row():
refresh_btn = gr.Button("πŸ”„ Refresh List")
delete_btn = gr.Button("πŸ—‘οΈ Delete Selected Analysis", variant="stop")
delete_status = gr.Textbox(label="Deletion Status", interactive=False)
with gr.TabItem("Document Detail", id=2):
with gr.Row():
search_id = gr.Textbox(label="Document ID")
search_btn = gr.Button("πŸ”Ž View Details")
detail_msg = gr.Textbox(label="Status", interactive=False)
with gr.Row():
with gr.Column(scale=1):
detail_preview = gr.File(label="Preview / Download")
with gr.Column(scale=2):
detail_analysis = gr.Markdown()
detail_meta = gr.Code(language="json")
with gr.TabItem("Help & Legal", id=3):
with gr.Column(elem_classes="container"):
gr.Markdown("## 🐒 Fraudoo Support & Legal")
with gr.Row():
with gr.Column(elem_classes="help-card"):
gr.Markdown("### πŸ“§ Support\nNeed assistance? Our support team is ready to help.")
gr.HTML('<a href="/static/support.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Open Support Page β†’</a>')
with gr.Column(elem_classes="help-card"):
gr.Markdown("### βš–οΈ Legal\nReview our terms and how we protect your data.")
gr.HTML('<a href="/static/privacy.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Privacy Policy</a>')
gr.HTML('<br><a href="/static/terms.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Terms of Service</a>')
gr.HTML("""
<div class="footer-links">
<a href="/static/support.html" target="_blank">Support</a>
<a href="/static/privacy.html" target="_blank">Privacy</a>
<a href="/static/terms.html" target="_blank">Terms</a>
<span style="color: #64748b; margin-left: 20px;">© 2026 Fraudoo 🐒</span>
</div>
""")
# Events
submit_btn.click(
fn=process_document,
inputs=[file_input],
outputs=[dup_output, analysis_output, meta_output, search_id, preview_input, history_table]
)
search_btn.click(
fn=retrieve_document,
inputs=[search_id],
outputs=[detail_msg, detail_analysis, detail_meta, detail_preview]
)
history_table.select(
fn=on_select_history,
inputs=[history_table],
outputs=[detail_msg, detail_analysis, detail_meta, detail_preview, main_tabs, selected_id_state]
)
delete_btn.click(
fn=delete_analysis,
inputs=[selected_id_state],
outputs=[delete_status, history_table]
)
refresh_btn.click(fn=get_history_df, outputs=[history_table])
if __name__ == "__main__":
# Ensure UPLOAD_DIR exists and is used
demo.launch(
mcp_server=True,
theme=gr.themes.Soft(),
css=css,
allowed_paths=[STATIC_DIR, UPLOAD_DIR]
)