Spaces:

obaes
/

fraudoo

Sleeping

App Files Files Community

fraudoo / app.py

obaes

Upload 10 files

01649f1 verified 2 months ago

raw

history blame contribute delete

11.1 kB

	import gradio as gr
	import os
	from fraud_analyzer import FraudAnalyzer
	from vector_service import VectorService
	import json
	import uuid
	import pandas as pd
	import re
	import shutil

	# Initialize
	API_KEY = os.environ.get("GOOGLE_API_KEY")
	analyzer = FraudAnalyzer(API_KEY) if API_KEY else None
	vector_db = VectorService()
	UPLOAD_DIR = os.path.abspath("./uploads")
	STATIC_DIR = os.path.abspath("./static")
	os.makedirs(UPLOAD_DIR, exist_ok=True)
	os.makedirs(STATIC_DIR, exist_ok=True)

	# Serve static files natively through Gradio
	gr.set_static_paths(paths=["static/", "uploads/"])

	def parse_flash_metrics(analysis_text):
	"""Attempt to parse structured fields from Flash's response."""
	metrics = {"label": "Unknown", "amount": "0", "fraud_score": "0"}
	try:
	# Sometimes LLM Services wraps in ```json ... ```
	clean_text = analysis_text
	json_match = re.search(r"```json\s(\{.?\})\s*```", analysis_text, re.DOTALL)
	if json_match:
	try:
	data = json.loads(json_match.group(1))
	metrics.update({k: str(v) for k, v in data.items() if k in metrics})
	return metrics
	except:
	clean_text = json_match.group(1)

	# Fallback to regex search for individual fields
	label_match = re.search(r"\"label\":\s*\"([^\"]+)\"", clean_text)
	amount_match = re.search(r"\"amount\":\s*\"?([^\",\s]+)\"?", clean_text)
	score_match = re.search(r"\"fraud_score\":\s*\"?(\d+)\"?", clean_text)

	if label_match: metrics["label"] = label_match.group(1)
	if amount_match: metrics["amount"] = amount_match.group(1)
	if score_match: metrics["fraud_score"] = score_match.group(1)
	except Exception as e:
	print(f"Error parsing metrics: {e}")
	return metrics

	def process_document(file_path):
	"""
	Analyzes a document for fraud using LLM Services 3 Flash and Nano Banana.
	Extracts structured data, detects duplicates, and generates a fraud score.

	Args:
	file_path (str): The local path to the document file (Image or PDF) to be analyzed.
	"""
	if not API_KEY:
	return "Error: GOOGLE_API_KEY not set.", None, None, None, None, get_history_df()
	if not file_path:
	return "Please upload a document.", None, None, None, None, get_history_df()

	filename = os.path.basename(file_path)
	persistent_path = os.path.join(UPLOAD_DIR, f"{str(uuid.uuid4())[:8]}_{filename}")
	shutil.copy(file_path, persistent_path)

	dup_result = vector_db.find_duplicates(persistent_path)
	dup_msg = "No duplicates found."
	if dup_result:
	dup_msg = f"⚠️ DUPLICATE DETECTED: {dup_result['type']}"

	result = analyzer.analyze_document(persistent_path)
	metrics = parse_flash_metrics(result['llm_analysis'])

	doc_id = str(uuid.uuid4())[:8]

	score_val = metrics.get('fraud_score', '0')
	formatted_score = f"{score_val}/100"

	meta = result['metadata']
	meta['llm_analysis'] = result['llm_analysis']
	meta['filename'] = filename
	meta['label'] = metrics['label']
	meta['amount'] = metrics['amount']
	meta['fraud_score'] = formatted_score
	meta['file_path'] = persistent_path

	vector_db.add_document(persistent_path, doc_id, metadata={k: str(v) for k, v in meta.items() if v is not None})

	return f"ID: {doc_id} \| {dup_msg}", result['llm_analysis'], json.dumps(result['metadata'], indent=2), doc_id, persistent_path, get_history_df()

	def get_history_df():
	"""
	Retrieves the complete history of analyzed documents from the vector database.
	Returns a list of documents with their IDs, labels, amounts, and fraud scores.
	"""
	docs = vector_db.collection.get()
	if not docs or not docs['ids']:
	return pd.DataFrame(columns=["ID", "Label", "Amount", "Fraud Score"])

	data = []
	for i in range(len(docs['ids'])):
	meta = docs['metadatas'][i]
	score = meta.get('fraud_score', '0')
	if "/" not in str(score):
	score = f"{score}/100"

	data.append([
	docs['ids'][i],
	meta.get('label', 'Unknown'),
	meta.get('amount', '0'),
	score
	])
	return pd.DataFrame(data, columns=["ID", "Label", "Amount", "Fraud Score"])

	def delete_analysis(doc_id):
	"""
	Deletes a specific fraud analysis record and its associated files using its unique ID.

	Args:
	doc_id (str): The unique identifier of the analysis record to be deleted.
	"""
	if not doc_id:
	return "Please select an analysis to delete first.", get_history_df()

	vector_db.delete_document(doc_id)
	return f"Successfully deleted ID: {doc_id}", get_history_df()

	def on_select_history(evt: gr.SelectData, df):
	"""Triggered when a row in the history table is clicked."""
	doc_id = df.iloc[evt.index[0]]["ID"]
	msg, analysis, meta_str, file_path = retrieve_document(doc_id)
	# Return values + the ID to store in gr.State
	return msg, analysis, meta_str, file_path, gr.Tabs(selected=2), doc_id

	def retrieve_document(doc_id):
	"""
	Fetches the detailed analysis results, technical metadata, and the original document for a given ID.

	Args:
	doc_id (str): The unique identifier of the document analysis to retrieve.
	"""
	if not doc_id:
	return "Enter ID", None, None, None

	doc = vector_db.get_document(doc_id)
	if not doc:
	return f"Not found: {doc_id}", None, None, None

	meta = doc['metadata']
	# Fallback for historical 'gemini_analysis' key
	analysis = meta.get('llm_analysis', meta.get('gemini_analysis', "No analysis."))
	file_path = meta.get('file_path')

	if not os.path.exists(file_path):
	return f"Error: File missing at {file_path}", analysis, "{}", None

	display_meta = {k: v for k, v in meta.items() if k not in ['llm_analysis', 'gemini_analysis', 'file_path']}
	return f"Retrieved: {meta.get('filename')}", analysis, json.dumps(display_meta, indent=2), file_path

	css = """
	body { background-color: #f0f2f5; font-family: 'Inter', sans-serif; }
	.container { max-width: 1000px; margin: auto; padding: 20px; }
	.header { text-align: center; margin-bottom: 40px; }
	.result-box { background: white; border-radius: 8px; padding: 20px; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
	.footer-links { text-align: center; padding: 20px; border-top: 1px solid #e2e8f0; margin-top: 40px; }
	.footer-links a { margin: 0 15px; text-decoration: none; color: #4f46e5; font-weight: 600; }
	.help-card { background: white; padding: 2rem; border-radius: 15px; border-left: 5px solid #4f46e5; margin-bottom: 1rem; }
	"""

	with gr.Blocks() as demo:
	gr.Markdown("# 🛡️ Documentary Fraud & History Explorer")

	with gr.Tabs() as main_tabs:
	with gr.TabItem("New Analysis", id=0):
	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(label="Upload Document")
	submit_btn = gr.Button("🔍 Analyze", variant="primary")
	with gr.Column(scale=2):
	dup_output = gr.Textbox(label="Status", interactive=False)
	preview_input = gr.File(label="Document Preview", interactive=False)

	with gr.Tabs():
	with gr.TabItem("Analysis Result"):
	analysis_output = gr.Markdown()
	with gr.TabItem("Technical Data"):
	meta_output = gr.Code(language="json")

	with gr.TabItem("History Overview", id=1):
	history_table = gr.Dataframe(
	value=get_history_df(),
	headers=["ID", "Label", "Amount", "Fraud Score"],
	interactive=False,
	label="Click a row to view details"
	)
	selected_id_state = gr.State("") # To store the ID to delete
	with gr.Row():
	refresh_btn = gr.Button("🔄 Refresh List")
	delete_btn = gr.Button("🗑️ Delete Selected Analysis", variant="stop")
	delete_status = gr.Textbox(label="Deletion Status", interactive=False)

	with gr.TabItem("Document Detail", id=2):
	with gr.Row():
	search_id = gr.Textbox(label="Document ID")
	search_btn = gr.Button("🔎 View Details")

	detail_msg = gr.Textbox(label="Status", interactive=False)
	with gr.Row():
	with gr.Column(scale=1):
	detail_preview = gr.File(label="Preview / Download")
	with gr.Column(scale=2):
	detail_analysis = gr.Markdown()
	detail_meta = gr.Code(language="json")

	with gr.TabItem("Help & Legal", id=3):
	with gr.Column(elem_classes="container"):
	gr.Markdown("## 🐢 Fraudoo Support & Legal")

	with gr.Row():
	with gr.Column(elem_classes="help-card"):
	gr.Markdown("### 📧 Support\nNeed assistance? Our support team is ready to help.")
	gr.HTML('<a href="/static/support.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Open Support Page →</a>')

	with gr.Column(elem_classes="help-card"):
	gr.Markdown("### ⚖️ Legal\nReview our terms and how we protect your data.")
	gr.HTML('<a href="/static/privacy.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Privacy Policy</a>')
	gr.HTML('<br><a href="/static/terms.html" target="_blank" style="color: #4f46e5; font-weight: bold;">Terms of Service</a>')

	gr.HTML("""
	<div class="footer-links">
	<a href="/static/support.html" target="_blank">Support</a>
	<a href="/static/privacy.html" target="_blank">Privacy</a>
	<a href="/static/terms.html" target="_blank">Terms</a>
	<span style="color: #64748b; margin-left: 20px;">© 2026 Fraudoo 🐢</span>
	</div>
	""")

	# Events
	submit_btn.click(
	fn=process_document,
	inputs=[file_input],
	outputs=[dup_output, analysis_output, meta_output, search_id, preview_input, history_table]
	)

	search_btn.click(
	fn=retrieve_document,
	inputs=[search_id],
	outputs=[detail_msg, detail_analysis, detail_meta, detail_preview]
	)

	history_table.select(
	fn=on_select_history,
	inputs=[history_table],
	outputs=[detail_msg, detail_analysis, detail_meta, detail_preview, main_tabs, selected_id_state]
	)

	delete_btn.click(
	fn=delete_analysis,
	inputs=[selected_id_state],
	outputs=[delete_status, history_table]
	)

	refresh_btn.click(fn=get_history_df, outputs=[history_table])

	if __name__ == "__main__":
	# Ensure UPLOAD_DIR exists and is used
	demo.launch(
	mcp_server=True,
	theme=gr.themes.Soft(),
	css=css,
	allowed_paths=[STATIC_DIR, UPLOAD_DIR]
	)