Spaces:

spunteam
/

api-web-crawler

Build error

App Files Files Community

api-web-crawler / app /util /schengen_visa_letter_generator.py

mrfirdauss

fix document filling

80ffa08 5 months ago

raw

history blame contribute delete

6.67 kB

	import os
	import logging
	import json
	import uuid
	import requests
	import pandas as pd
	import datetime
	from fpdf import FPDF
	from pydantic import BaseModel

	# Local imports
	from .pdf_document_generator import PDFDocumentGenerator
	from .db_utils import DBManager
	from .schema import SchengenVisaData # Using the schema from file 1

	class SchengenVisaLetterGenerator(PDFDocumentGenerator):
	def __init__(self, data: dict, genai_client=None):
	super().__init__(data)
	self.logger = logging.getLogger(__name__)
	self.genai_client = genai_client
	self.db = DBManager()

	def _get_day_suffix(self, day):
	if 4 <= day <= 20 or 24 <= day <= 30: return "th"
	else: return ["st", "nd", "rd"][day % 10 - 1]

	def _record_usage(self, usage_data: dict, application_id: int, parsed_result: dict):
	"""Records token usage via API call."""
	worker_api_key = os.getenv("WORKER_API_KEY")
	api_url = os.getenv("CORE_API_URL", "https://api-dev.spun.global")

	if not worker_api_key or not api_url:
	return

	headers = {"X-WORKER-Api-Key": worker_api_key, "Content-Type": "application/json"}
	body = {
	"external_id": str(uuid.uuid4()),
	"history": parsed_result,
	"smart_type": "visa_form_filler",
	"provider_type": "google_ai_studio",
	"total_input_amount": usage_data.get('input', 0),
	"total_output_amount": usage_data.get('output', 0),
	"meta": {"application_id": application_id}
	}
	try:
	requests.post(f"{api_url}/v1/smart/token/record-usage", headers=headers, json=body, timeout=2.0)
	except Exception as e:
	self.logger.error(f"Failed to record token usage: {e}")

	def _build_context_string(self, profile_name, destination, form_df, smart_df):
	context = []
	context.append(f"--- CONTEXT ---\nProfile Name: {profile_name}\nTarget Destination: {destination}")
	context.append("\n--- USER FORM ANSWERS ---")
	if not form_df.empty:
	for _, row in form_df.iterrows():
	context.append(f"{row['document_name']}: {row['value']}")
	context.append("\n--- EXTRACTED DOCUMENTS (OCR) ---")
	if not smart_df.empty:
	for _, row in smart_df.iterrows():
	try:
	raw = row['extraction_result']
	text_dump = json.dumps(raw) if isinstance(raw, dict) else str(raw)
	context.append(f"Doc Type: {row['doc_type']} \| Data: {text_dump}")
	except: pass
	return "\n".join(context)

	def get_prefill_data(self) -> dict:
	"""AI-Driven Data Extraction."""
	application_id = self.data.get("application_id")
	if not application_id: return {"error": "Application ID required"}

	try:
	# 1. Fetch Raw Data
	profile_name = self.db.get_profile_name_by_app_id(application_id)
	destination = self.db.get_destination_country(application_id)
	form_df = self.db.get_form_data(application_id)
	smart_df = self.db.get_smart_upload_results(application_id)

	# 2. Build Prompt
	context_data = self._build_context_string(profile_name, destination, form_df, smart_df)
	prompt = f"""
	You are a Visa Application Data Processor. Map the raw data below to a Schengen Visa Letter JSON.
	RULES:
	1. Trust OCR data over Manual Form data if conflicting.
	2. Dates MUST be 'DD Month YYYY'.
	3. Calculate duration from flight dates if needed.
	4. Use placeholder strings if data is missing.
	RAW DATA:
	{context_data}
	"""

	if self.genai_client:
	# 3. Synchronous AI Call
	response = self.genai_client.sync_formated_prompt(prompt, response_schema=SchengenVisaData)

	# 4. Record Usage
	if response.get("usage"):
	self._record_usage(response["usage"], application_id, response.get("parsed", {}))

	return response.get("parsed", {})
	return {"error": "AI Client not initialized"}

	except Exception as e:
	self.logger.error(f"Prefill failed: {e}", exc_info=True)
	return {"error": str(e)}

	def build_document(self, pdf: FPDF):
	"""Generates the PDF using the data (either prefilled by AI or edited by user)."""
	data = self.data
	personal = data.get("personal_details", {})
	if hasattr(personal, 'model_dump'): personal = personal.model_dump() # Handle Pydantic objects

	# Extract fields with defaults
	today = datetime.date.today()
	suffix = self._get_day_suffix(today.day)
	date_str = today.strftime(f"%d{suffix} %B %Y")

	city = data.get("city", "Jakarta")
	country = data.get("country", "[Country]")

	# PDF Formatting
	pdf.set_font("Times", "B", 14)
	pdf.set_font("Times", "", 12)
	pdf.multi_cell(0, 8, f"{city}, {date_str}")
	pdf.multi_cell(0, 8, f"To: Embassy of {country} Jakarta")
	pdf.multi_cell(0, 8, "Subject: Application for Schengen Visa")
	pdf.ln(8)

	body = (f"Dear Sir/Madam,\n\nI am writing to apply for a Schengen {data.get('purpose', 'visa')} "
	f"to visit {data.get('main_dest', 'Europe')} on {data.get('event', 'vacation')}. "
	f"Travel dates: {data.get('start', '')} to {data.get('end', '')} ({data.get('duration', '')}).")
	pdf.multi_cell(0, 8, body)
	pdf.ln(8)

	# Personal Details
	pdf.set_font("Times", "B", 12)
	pdf.cell(0, 10, "Personal Details:", ln=True)
	pdf.set_font("Times", "", 12)

	fields = [
	("Name", personal.get("name", "")),
	("DOB", personal.get("dob", "")),
	("Nationality", personal.get("nationality", "")),
	("Passport", personal.get("passport_number", "")),
	("Occupation", personal.get("occupation", ""))
	]
	for label, val in fields:
	pdf.cell(45, 8, label, align="L")
	pdf.multi_cell(0, 8, f": {val}")

	# Closing
	pdf.ln(8)
	pdf.multi_cell(0, 8, f"Commitment: {data.get('job_commitment', '')}")
	pdf.ln(4)
	pdf.multi_cell(0, 8, f"Financial Status: {data.get('financial_status', 'sound')}")
	pdf.ln(8)
	pdf.multi_cell(0, 8, "Thank you,\n\n")
	pdf.multi_cell(0, 8, personal.get("name", ""))

	return pdf