api-web-crawler / app /util /schengen_visa_letter_generator.py
mrfirdauss's picture
fix document filling
80ffa08
import os
import logging
import json
import uuid
import requests
import pandas as pd
import datetime
from fpdf import FPDF
from pydantic import BaseModel
# Local imports
from .pdf_document_generator import PDFDocumentGenerator
from .db_utils import DBManager
from .schema import SchengenVisaData # Using the schema from file 1
class SchengenVisaLetterGenerator(PDFDocumentGenerator):
def __init__(self, data: dict, genai_client=None):
super().__init__(data)
self.logger = logging.getLogger(__name__)
self.genai_client = genai_client
self.db = DBManager()
def _get_day_suffix(self, day):
if 4 <= day <= 20 or 24 <= day <= 30: return "th"
else: return ["st", "nd", "rd"][day % 10 - 1]
def _record_usage(self, usage_data: dict, application_id: int, parsed_result: dict):
"""Records token usage via API call."""
worker_api_key = os.getenv("WORKER_API_KEY")
api_url = os.getenv("CORE_API_URL", "https://api-dev.spun.global")
if not worker_api_key or not api_url:
return
headers = {"X-WORKER-Api-Key": worker_api_key, "Content-Type": "application/json"}
body = {
"external_id": str(uuid.uuid4()),
"history": parsed_result,
"smart_type": "visa_form_filler",
"provider_type": "google_ai_studio",
"total_input_amount": usage_data.get('input', 0),
"total_output_amount": usage_data.get('output', 0),
"meta": {"application_id": application_id}
}
try:
requests.post(f"{api_url}/v1/smart/token/record-usage", headers=headers, json=body, timeout=2.0)
except Exception as e:
self.logger.error(f"Failed to record token usage: {e}")
def _build_context_string(self, profile_name, destination, form_df, smart_df):
context = []
context.append(f"--- CONTEXT ---\nProfile Name: {profile_name}\nTarget Destination: {destination}")
context.append("\n--- USER FORM ANSWERS ---")
if not form_df.empty:
for _, row in form_df.iterrows():
context.append(f"{row['document_name']}: {row['value']}")
context.append("\n--- EXTRACTED DOCUMENTS (OCR) ---")
if not smart_df.empty:
for _, row in smart_df.iterrows():
try:
raw = row['extraction_result']
text_dump = json.dumps(raw) if isinstance(raw, dict) else str(raw)
context.append(f"Doc Type: {row['doc_type']} | Data: {text_dump}")
except: pass
return "\n".join(context)
def get_prefill_data(self) -> dict:
"""AI-Driven Data Extraction."""
application_id = self.data.get("application_id")
if not application_id: return {"error": "Application ID required"}
try:
# 1. Fetch Raw Data
profile_name = self.db.get_profile_name_by_app_id(application_id)
destination = self.db.get_destination_country(application_id)
form_df = self.db.get_form_data(application_id)
smart_df = self.db.get_smart_upload_results(application_id)
# 2. Build Prompt
context_data = self._build_context_string(profile_name, destination, form_df, smart_df)
prompt = f"""
You are a Visa Application Data Processor. Map the raw data below to a Schengen Visa Letter JSON.
RULES:
1. Trust OCR data over Manual Form data if conflicting.
2. Dates MUST be 'DD Month YYYY'.
3. Calculate duration from flight dates if needed.
4. Use placeholder strings if data is missing.
RAW DATA:
{context_data}
"""
if self.genai_client:
# 3. Synchronous AI Call
response = self.genai_client.sync_formated_prompt(prompt, response_schema=SchengenVisaData)
# 4. Record Usage
if response.get("usage"):
self._record_usage(response["usage"], application_id, response.get("parsed", {}))
return response.get("parsed", {})
return {"error": "AI Client not initialized"}
except Exception as e:
self.logger.error(f"Prefill failed: {e}", exc_info=True)
return {"error": str(e)}
def build_document(self, pdf: FPDF):
"""Generates the PDF using the data (either prefilled by AI or edited by user)."""
data = self.data
personal = data.get("personal_details", {})
if hasattr(personal, 'model_dump'): personal = personal.model_dump() # Handle Pydantic objects
# Extract fields with defaults
today = datetime.date.today()
suffix = self._get_day_suffix(today.day)
date_str = today.strftime(f"%d{suffix} %B %Y")
city = data.get("city", "Jakarta")
country = data.get("country", "[Country]")
# PDF Formatting
pdf.set_font("Times", "B", 14)
pdf.set_font("Times", "", 12)
pdf.multi_cell(0, 8, f"{city}, {date_str}")
pdf.multi_cell(0, 8, f"To: Embassy of {country} Jakarta")
pdf.multi_cell(0, 8, "Subject: Application for Schengen Visa")
pdf.ln(8)
body = (f"Dear Sir/Madam,\n\nI am writing to apply for a Schengen {data.get('purpose', 'visa')} "
f"to visit {data.get('main_dest', 'Europe')} on {data.get('event', 'vacation')}. "
f"Travel dates: {data.get('start', '')} to {data.get('end', '')} ({data.get('duration', '')}).")
pdf.multi_cell(0, 8, body)
pdf.ln(8)
# Personal Details
pdf.set_font("Times", "B", 12)
pdf.cell(0, 10, "Personal Details:", ln=True)
pdf.set_font("Times", "", 12)
fields = [
("Name", personal.get("name", "")),
("DOB", personal.get("dob", "")),
("Nationality", personal.get("nationality", "")),
("Passport", personal.get("passport_number", "")),
("Occupation", personal.get("occupation", ""))
]
for label, val in fields:
pdf.cell(45, 8, label, align="L")
pdf.multi_cell(0, 8, f": {val}")
# Closing
pdf.ln(8)
pdf.multi_cell(0, 8, f"Commitment: {data.get('job_commitment', '')}")
pdf.ln(4)
pdf.multi_cell(0, 8, f"Financial Status: {data.get('financial_status', 'sound')}")
pdf.ln(8)
pdf.multi_cell(0, 8, "Thank you,\n\n")
pdf.multi_cell(0, 8, personal.get("name", ""))
return pdf