Spaces:
Sleeping
Sleeping
| import os | |
| from pdf2image import convert_from_path | |
| from datetime import date | |
| from PIL import Image | |
| import gradio as gr | |
| from google import genai | |
| import zipfile | |
| import tempfile | |
| os.system("apt-get install poppler-utils") | |
| import datetime | |
| from docx import Document | |
| import time | |
| import random | |
| from google.genai.types import GenerateContentConfig | |
| import json | |
| def extract_zip_and_collect_files(zip_file_path): | |
| """ | |
| Extract zip file to a temp directory and return list of pdf/image file paths inside. | |
| """ | |
| temp_dir = tempfile.mkdtemp() | |
| with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
| zip_ref.extractall(temp_dir) | |
| # Collect all pdf and image files in extracted folder recursively | |
| collected_files = [] | |
| for root, _, files in os.walk(temp_dir): | |
| for f in files: | |
| if f.lower().endswith(('.pdf', '.jpg', '.jpeg', '.png')): | |
| collected_files.append(os.path.join(root, f)) | |
| return collected_files | |
| # Function to process a list of PDF files and convert them to images | |
| def process_pdfs(pdf_files, dpi): | |
| """ | |
| Process a list of PDF files, convert each to images, and return all images. | |
| """ | |
| all_images = [] | |
| for pdf_file in pdf_files: | |
| if not os.path.isfile(pdf_file): | |
| raise ValueError(f"File {pdf_file} does not exist.") | |
| images = convert_from_path(pdf_file, dpi=dpi) | |
| all_images.extend(images) | |
| return all_images | |
| # Function to analyze the extracted image using Google GenAI | |
| def gemini_analysis(images, tanggal_berangkat, tanggal_pulang, tanggal_biometrik): | |
| client = genai.Client(api_key='AIzaSyBpviFHkaEF-GAjMMl28dIS1poikhBqq_w') | |
| # Define your prompt | |
| prompt = '''Anda bertugas memvalidasi kelengkapan, kesesuaian dan konsistensi data dari dokumen individual berdasarkan syarat Visa di bawah. Cukup fokus pada syarat yang diberikan tanpa mengambil referensi lain. | |
| Pastikan data seperti nama, tanggal, tujuan dan lainnya konsisten antar dokumen. | |
| --- | |
| DAFTAR SYARAT DOKUMEN YANG WAJIB DIPERIKSA: | |
| 1. Paspor: WAJIB ADA, aktif min. 6 bulan setelah Tanggal Pulang Perjalanan, Ada tanda tangan, Lampirkan paspor lama jika ada, Copy paspor sponsor jika disponsori. | |
| 2. Fotokopi Paspor: Tidak wajib ada, Halaman depan & tanda tangan, Semua visa perjalanan sebelumnya jika ada (terutama 5 tahun terakhir). | |
| 3. Pas Foto: Wajib ada, background putih, Wajah terlihat 80%, alis terlihat, tidak berbayang. | |
| 4. Kartu Keluarga (KK): Wajib ada, Minimal versi 2019 atau memiliki barcode, Sesuai status, Harus ditranslate untuk VFS Germany. | |
| 5. Akte Nikah/Surat Nikah/Bukti nikah/Surat Cerai/ Bukti Cerai: Wajib ada jika sudah menikah/bercerai, Tidak perlu jika belum, Halaman biodata suami/istri, Jika istri ikut, wajib ada Surat Izin Suami. | |
| 6. KTP: Wajib ada, Nama harus sama dengan paspor, KK dan dokumen-dokumen lain. Jika nama berbeda wajib Surat Beda Nama. | |
| 7. KTP Sponsor: Wajib ada jika disponsori. | |
| 8. Akta Kelahiran / Surat Kelahiran / Bukti Kelahiran / Ijazah: Wajib jika ada anak-anak atau disponsori orang lain. | |
| 9. Surat Sponsor (Guarantee Letter): Wajib ada. Bisa dari universitas, dinas/perusahaan, orang tua, atau orang lain (misal. suami). Wajib berbahasa Inggris, tertera Tujuan negara, tanggal trip, Siapa penanggung biaya, Wajib ada tulisan yang menjaminan kembali ke Indonesia. | |
| 10. Status Pekerjaan (WAJIB ADA salah satu): Pegawai (Surat kerja + Slip gaji 3 bulan), Pemilik usaha (NIB/SIUP + Surat jaminan staf), Pelajar (Surat sekolah/universitas + kartu pelajar), Freelancer/Onlineshop (Kontrak kerja & 5 bukti transaksi), Pensiun (Surat pensiun + guarantee keluarga). | |
| 11. Rekening Koran 3 bulan terakhir hingga tanggal biometrik: WAJIB ADA, Atas nama pribadi, tertera Cap & logo bank, nama, nomor rekening, Saldo stabil min. Rp 35 juta/orang dari 3 bulan hingga 1 minggu sebelum tanggal biometrik. | |
| 12. Rekening Koran Sponsor 3 bulan terakhir hingga tanggal biometrik: WAJIB ADA jika disponsori, Atas & sponsor, tertera Cap & logo bank, nama, nomor rekening, Saldo stabil min. Rp 35 juta/orang dari 3 bulan hingga 1 minggu sebelum tanggal biometrik. | |
| 13. Slip Gaji: Wajib ada jika pekerja, 3 bulan terakhir sebelum tanggal berankgat, Jika suami lengkap, istri cukup lampirkan rekening koran suami. | |
| 14. Surat Referensi Bank: Wajib ada. Surat yang menyatakan bahwa peserta tersebut adalah nasabah bank terkait. Berbeda dengan rekening koran. | |
| 15. Surat Referensi Bank Sponsor: Wajib ada jika disponsori. Berupa surat yang menyatakan bahwa pemberi sponsor tersebut adalah nasabah bank terkait. Berbeda dengan rekening koran. | |
| --- | |
| TEMPLATE OUTPUT JAWABAN DALAM FORMAT JSON: | |
| { | |
| "analysis": | |
| { | |
| "nama_dokumen_1": {"status": "VALID / INVALID / Tidak Perlu", | |
| "description": "penjelasan detail"}, | |
| "nama_dokumen_2": {"status": "VALID / INVALID / Tidak Perlu", | |
| "description": "penjelasan detail"}, | |
| "nama_dokumen_3": {"status": "VALID / INVALID / Tidak Perlu", | |
| "description": "penjelasan detail"} | |
| }, | |
| "summary": "...", # Analisa keseluruhan dokumen yang diperiksa | |
| "invalid_item": ["nama_dokumen_1", "nama_dokumen_2"], | |
| "notice_msg": "...", | |
| "form_filling": {"Surname":"...", | |
| "First Name":"...", | |
| "Date of Birth":"...", | |
| "Place of Birth":"...", | |
| "Nationality":"...", | |
| "Sex":"...", | |
| "Mariage Status":"...", | |
| "Passport Number":"...", | |
| "Passport Expiry Date":"...", | |
| "National Identity Number":"...", | |
| "Travel Document Type":"...", | |
| "Travel Document Number":"...", | |
| "Date Of Issue":"...", | |
| "Valid Until":"...", | |
| "Issued Country":"...", | |
| "Applicant's Home Address":"...", | |
| "Applicant's Telephone Number":"...", | |
| "Applicant's Email Address":"...", | |
| "Current Occupation":"...", | |
| "Employer/Educational Address":"...", | |
| "Journey Purpose":"...", | |
| "Destination":"...", | |
| "Duration":"...", | |
| "Number of Entries":"...", # single or multiple | |
| "already has fingerprint":"...", | |
| "inviting person from each destination":"...", | |
| "inviting person email address":"...", | |
| "Traveling and living cost covered by":"..." | |
| } # Hanya isi yang ada di dokumen saja. Jika tidak ada, tidak usah diisi atau dimunculkan | |
| } | |
| --- | |
| TEMPLATE PESAN PEMBERITAHUAN (notice_msg): | |
| Berikut kami informasikan kekurangan dokumen yang *WAJIB* dibawa saat biometric visa schengen nanti ya : | |
| 1. Pas Foto (3,5 x 4,5 cm) | |
| 2. Paspor Asli | |
| 3. Fotokopi paspor | |
| 4. ... | |
| 5. ... | |
| # dan seterusnya dokumen dokumen lain yang belum lengkap dan perlu revisi berdasarkan hasil analisa | |
| --- | |
| TEMPLATE SUMMARY: | |
| List Dokumen yang sudah valid: .... | |
| List Dokumen yang belum ada / perlu diperbaiki / perlu cek ulang : ... | |
| Analisis detail keseluruhan : ... | |
| --- | |
| ''' | |
| # Perform document analysis | |
| prompt_with_date = f'Tanggal Berangkat={tanggal_berangkat}. Tanggal pulang={tanggal_pulang}. Tanggal Biometrik={tanggal_biometrik}\n\n{prompt}' | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash-lite", | |
| contents=[prompt_with_date] + images, | |
| config=GenerateContentConfig( | |
| temperature=0.2, | |
| top_p=0.2, | |
| response_mime_type="application/json" | |
| ) | |
| ) | |
| pre_token_usage = response.usage_metadata.total_token_count | |
| token_usage = pre_token_usage*5/1000 | |
| raw_output = response.text | |
| # β Inisialisasi variabel default | |
| analysis = {} | |
| summary = "" | |
| invalid_list = [] | |
| notice_msg = "" | |
| form_filling = {} | |
| try: | |
| parsed_output = json.loads(response.text) | |
| analysis = parsed_output.get("analysis", {}) | |
| analysis_str = json.dumps(analysis, indent=2, ensure_ascii=False) | |
| summary = parsed_output.get("summary", "") | |
| invalid_list = parsed_output.get("invalid_item", []) | |
| invalid_list_str = json.dumps(invalid_list, indent=2, ensure_ascii=False) | |
| notice_msg = parsed_output.get("notice_msg", "") | |
| notice_msg = '''π» *(WISH TRAVELERS) UPDATE DOKUMEN PENGAJUAN VISA* π» | |
| Penting, mohon di baca sampai habis ya kak ππ» | |
| Ini adalah nomor sistem, tidak dapat membalas pesan. Apabila ada pertanyaan, silahkan hubungi mimin WT wa.me/6282123038484 | |
| Halo Kak selamat malam π | |
| ''' + notice_msg | |
| form_filling = parsed_output.get("form_filling", "") | |
| form_filling_str = json.dumps(form_filling, indent=2, ensure_ascii=False) | |
| except Exception as e: | |
| print(f"Error parsing JSON: {e}") | |
| return raw_output, analysis_str, summary, invalid_list_str, notice_msg, form_filling_str, token_usage | |
| def process_and_zip_all_images(images, zip_name="All_PDF_Docs.zip"): | |
| # Inisialisasi Gemini client | |
| client = genai.Client(api_key='AIzaSyBpviFHkaEF-GAjMMl28dIS1poikhBqq_w') | |
| # Prompt untuk klasifikasi nama file | |
| prompt_3 = '''Anda adalah asisten yang membantu menamai file gambar dokumen. | |
| Tugas Anda adalah mengidentifikasi jenis dokumen pada gambar ini dan memberikan nama file yang sesuai. | |
| Jawaban Anda *harus* berupa *salah satu* nama file dari daftar berikut: | |
| ['paspor', 'pasfoto', 'kartukeluarga', 'buktinikah', 'KTP', 'suratkelahiran', 'suratsponsor', 'suratkerja', 'NIB', 'SIUP', 'suratjaminanstaff', 'suratsekolah', 'kontrakkerja', 'suratpensiun', 'rekeningkoran', 'slipgaji'] | |
| Jawaban Anda *hanya* boleh berupa teks yang *persis sama* dengan salah satu item dalam daftar tersebut. | |
| Jangan tambahkan penjelasan, tanda kutip, titik, atau teks tambahan lainnya. | |
| Contoh: | |
| Gambar : [tampak gambar KTP] | |
| Output: KTP | |
| Gambar: [gambar sebenarnya] | |
| Output: | |
| ''' | |
| # Step 1: Klasifikasi & Penamaan | |
| renamed_images = [] | |
| for image in images: | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash-lite", | |
| contents=[prompt_3, image], | |
| config=GenerateContentConfig( | |
| temperature=0.1, | |
| top_p=0.1 | |
| ) | |
| ) | |
| filename = response.text.strip().lower() | |
| renamed_images.append({"image": image, "filename": filename}) | |
| # Step 2: Kelompokkan berdasarkan nama file (tanpa ekstensi) | |
| grouped = {} | |
| for item in renamed_images: | |
| name = os.path.splitext(item["filename"])[0] | |
| grouped.setdefault(name, []).append(item["image"]) | |
| # Step 3: Simpan ke PDF dan masukkan ke ZIP | |
| temp_dir = tempfile.mkdtemp() | |
| zip_path = os.path.join(tempfile.gettempdir(), zip_name) | |
| with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zipf: | |
| for doc_name, images in grouped.items(): | |
| images_rgb = [img.convert("RGB") for img in images] | |
| pdf_path = os.path.join(temp_dir, f"{doc_name}.pdf") | |
| if len(images_rgb) == 1: | |
| images_rgb[0].save(pdf_path, save_all=True) | |
| else: | |
| images_rgb[0].save(pdf_path, save_all=True, append_images=images_rgb[1:]) | |
| zipf.write(pdf_path, arcname=f"{doc_name}.pdf") | |
| return zip_path | |
| def main_process(files, tanggal_berangkat, tanggal_pulang, tanggal_biometrik, dpi): | |
| all_images = [] | |
| image_paths_for_zip = [] | |
| for file in files: | |
| file_path = file.name if hasattr(file, 'name') else file | |
| if file_path.lower().endswith('.zip'): | |
| extracted_files = extract_zip_and_collect_files(file_path) | |
| for extracted_file in extracted_files: | |
| if extracted_file.lower().endswith('.pdf'): | |
| images = process_pdfs([extracted_file], dpi) | |
| all_images.extend(images) | |
| elif extracted_file.lower().endswith(('.jpg', '.jpeg', '.png')): | |
| image = Image.open(extracted_file) | |
| all_images.append(image) | |
| elif file_path.lower().endswith('.pdf'): | |
| images = process_pdfs([file_path], dpi) | |
| all_images.extend(images) | |
| elif file_path.lower().endswith(('.jpg', '.jpeg', '.png')): | |
| image = Image.open(file_path) | |
| all_images.append(image) | |
| else: | |
| raise ValueError(f"File {file_path} is not a valid image, PDF, or ZIP.") | |
| # Generate summary from images | |
| raw_output, analysis_str, summary, invalid_list_str, notice_msg, form_filling_str, token_usage = gemini_analysis(all_images, tanggal_berangkat, tanggal_pulang, tanggal_biometrik) | |
| rdf = random.randint(5, 10) | |
| time.sleep(rdf) | |
| # Create DOCX for summary output | |
| doc = Document() | |
| doc.add_heading("Visa Document Check Summary", level=1) | |
| doc.add_paragraph(f"Tanggal Berangkat: {tanggal_berangkat}") | |
| doc.add_paragraph(f"Tanggal Pulang: {tanggal_pulang}") | |
| for line in analysis_str.split("\n"): | |
| doc.add_paragraph(line) | |
| doc.add_paragraph(f"Summary: {summary}\n\n") | |
| doc.add_paragraph(f"Invalid List: {invalid_list_str}\n\n") | |
| doc.add_paragraph(f"Notice Message: {notice_msg}\n\n") | |
| doc.add_paragraph(f"Form Filling: {form_filling_str}\n\n") | |
| first_file = files[0] | |
| first_filename = os.path.basename(first_file.name if hasattr(first_file, 'name') else first_file) | |
| base_name = os.path.splitext(first_filename)[0] | |
| docx_filename = f"summary_{base_name}.docx" | |
| temp_docx_path = os.path.join(tempfile.gettempdir(), docx_filename) | |
| doc.save(temp_docx_path) | |
| # Filtering the file | |
| zip_file_path = process_and_zip_all_images(all_images, zip_name=f'All_PDF_Docs_{base_name}.zip') | |
| return temp_docx_path, form_filling_str, zip_file_path, invalid_list_str, raw_output, summary, notice_msg, token_usage | |
| # Gradio UI update: add ".zip" to accepted file types | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π§ Noura the Document Checker βοΈ ") | |
| gr.Markdown("Last Updated: May 30 2025, 11.13 AM") | |
| file_input = gr.File( | |
| label="Upload PDFs, Images or ZIP files (Multiple Supported)", | |
| file_types=[".pdf", ".jpg", ".jpeg", ".png", ".zip"], | |
| file_count="multiple" | |
| ) | |
| with gr.Row(): | |
| tanggal_berangkat = gr.Textbox( | |
| label="Tanggal Keberangkatan", | |
| placeholder="Masukan Tanggal Keberangkatan", | |
| type="text" | |
| ) | |
| tanggal_pulang = gr.Textbox( | |
| label="Tanggal Kepulangan", | |
| placeholder="Masukan Tanggal Kepulangan", | |
| type="text" | |
| ) | |
| tanggal_biometrik = gr.Textbox( | |
| label="Tanggal Biometrik", | |
| placeholder="Masukan Tanggal Biometrik", | |
| type="text" | |
| ) | |
| dpi_slider = gr.Slider( | |
| minimum=100, | |
| maximum=400, | |
| step=25, | |
| label="Adjust DPI (100 - 400, Ξ=25, default=300)", | |
| value=300 # default value | |
| ) | |
| run_btn = gr.Button("π Run Analysis") | |
| with gr.Row(): | |
| download_output_docx = gr.File(label="π₯ Download Summary as DOCX", visible=True) | |
| download_valid_zip = gr.File(label="π₯ Download all PDF document in zip", visible=True) | |
| gr.Markdown("## π FORM FILLING RESULT") | |
| form_filling_output = gr.Textbox(label="π FORM FILLING RESULT", lines=20) | |
| gr.Markdown("## π INVALID DOCUMENT LIST") | |
| invalid_list_output = gr.Textbox(label="π INVALID DOCUMENT LIST", lines=5) | |
| gr.Markdown("## π SUMMARY") | |
| summary_output = gr.Textbox(label="π SUMMARY OUTPUT", lines=5) | |
| gr.Markdown("## π NOTIFICATION MESSAGE") | |
| notice_msg = gr.Textbox(label="π NOTIFICATION MSG", lines=10) | |
| gr.Markdown("## π RAW OUTPUT FROM AI") | |
| raw_output = gr.Textbox(label="π RAW OUTPUT", lines=20) | |
| gr.Markdown("Token cost in IDR") | |
| token_usage = gr.Textbox(label="Token cost in IDR", lines=5) | |
| run_btn.click( | |
| fn=main_process, | |
| inputs=[file_input, tanggal_berangkat, tanggal_pulang, tanggal_biometrik, dpi_slider], | |
| outputs=[download_output_docx, form_filling_output, download_valid_zip, invalid_list_output, raw_output, summary_output, notice_msg, token_usage] | |
| ) | |
| demo.launch(debug=True) |