Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ from docx import Document
|
|
| 12 |
import time
|
| 13 |
import random
|
| 14 |
from google.genai.types import GenerateContentConfig
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def extract_zip_and_collect_files(zip_file_path):
|
|
@@ -41,7 +42,7 @@ def process_pdfs(pdf_files):
|
|
| 41 |
if not os.path.isfile(pdf_file):
|
| 42 |
raise ValueError(f"File {pdf_file} does not exist.")
|
| 43 |
|
| 44 |
-
images = convert_from_path(pdf_file, dpi=300)
|
| 45 |
all_images.extend(images)
|
| 46 |
|
| 47 |
return all_images
|
|
@@ -49,21 +50,14 @@ def process_pdfs(pdf_files):
|
|
| 49 |
|
| 50 |
# Function to analyze the extracted image using Google GenAI
|
| 51 |
def gemini_analysis(images, tanggal_berangkat, tanggal_pulang, api_key):
|
| 52 |
-
"""
|
| 53 |
-
Analyze the extracted image using Google GenAI.
|
| 54 |
-
"""
|
| 55 |
-
# Initialize the GenAI client (make sure the API key is set properly)
|
| 56 |
-
today = date.today()
|
| 57 |
client = genai.Client(api_key=api_key)
|
| 58 |
# Define your prompt
|
| 59 |
-
prompt =
|
| 60 |
-
|
| 61 |
-
Pastikan data seperti nama, tanggal, tujuan dan lainnya konsisten antar dokumen. Semua jawaban dalam format text. Jangan tambahkan formating seperti * atau #
|
| 62 |
-
Jawaban cukup berupa rangkuman saja, tidak perlu per dokumen 1-per-1
|
| 63 |
-
Terakhir, buat pesan pemberitahuan berdasarkan hasil analisa dokumen-dokumen dengan format dari contoh di bawah.
|
| 64 |
---
|
| 65 |
DAFTAR SYARAT DOKUMEN YANG WAJIB DIPERIKSA:
|
| 66 |
1. Paspor
|
|
|
|
| 67 |
Asli, aktif min. 6 bulan setelah tanggal kepulangan
|
| 68 |
Ada tanda tangan pemilik
|
| 69 |
Lampirkan paspor lama jika ada visa perjalanan sebelumnya
|
|
@@ -71,95 +65,136 @@ def gemini_analysis(images, tanggal_berangkat, tanggal_pulang, api_key):
|
|
| 71 |
2. Fotokopi Paspor
|
| 72 |
Halaman depan & tanda tangan
|
| 73 |
Semua visa perjalanan sebelumnya jika ada (terutama 5 tahun terakhir)
|
| 74 |
-
Jika tidak ada, cukup beritahu bahwa Fotokopi Paspor belum ada.
|
| 75 |
3. Pas Foto
|
|
|
|
| 76 |
Ukuran kurang lebih 3.5 x 4.5 cm, background putih
|
| 77 |
Wajah terlihat 80%, alis tidak tertutup, tidak pakai softlens, tidak berbayang
|
| 78 |
4. Kartu Keluarga (KK)
|
|
|
|
| 79 |
Minimal versi 2019 atau berbarcode
|
| 80 |
Harus sesuai status (nikah/cerai/anak-anak β lampirkan dokumen pendukung)
|
| 81 |
Harus ditranslate untuk VFS Germany
|
| 82 |
-
5. Akte Nikah/
|
|
|
|
| 83 |
Halaman biodata suami dan istri saja
|
| 84 |
Jika istri ikut, wajib melampirkan surat izin suami
|
| 85 |
6. KTP
|
|
|
|
| 86 |
Nama harus sama dengan paspor dan tercantum pada Kartu Keluarga (KK)
|
| 87 |
Jika nama berbeda maka wajib lampirkan surat beda nama
|
| 88 |
7. Akta Kelahiran / Surat Kelahiran / Bukti Kelahiran / Ijazah
|
| 89 |
-
Wajib jika anak-anak atau peserta disponsori oranglain
|
| 90 |
8. Surat Sponsor (Guarantee Letter)
|
|
|
|
| 91 |
Dalam Bahasa Inggris
|
| 92 |
Tujuan negara, tanggal trip
|
| 93 |
Siapa yang menanggung biaya
|
| 94 |
Wajib ada tertulis menjamin akan kembali ke Indonesia
|
| 95 |
9. Status Pekerjaan
|
|
|
|
|
|
|
| 96 |
Pegawai: Surat kerja + Slip gaji 3 bulan
|
| 97 |
Pemilik usaha: NIB/SIUP + Surat jaminan staf
|
| 98 |
Pelajar: Surat sekolah/universitas + kartu pelajar
|
| 99 |
Freelancer/onlineshop: Kontrak kerja & 5 bukti transaksi
|
| 100 |
Pensiun: Surat pensiun + guarantee dari keluarga
|
| 101 |
-
10. Rekening Koran
|
|
|
|
| 102 |
Atas nama pribadi & sponsor
|
| 103 |
Cap & logo bank, nama, nomor rekening
|
| 104 |
Saldo stabil min. Rp 35 juta/orang
|
| 105 |
11. Slip Gaji
|
|
|
|
| 106 |
3 bulan terakhir
|
| 107 |
Jika suami lengkap β istri cukup lampirkan rekening koran suami
|
| 108 |
---
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
| 112 |
---
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
'''
|
| 126 |
|
| 127 |
# Perform document analysis
|
| 128 |
-
|
|
|
|
| 129 |
model="gemini-2.0-flash-lite",
|
| 130 |
-
contents=[
|
| 131 |
config=GenerateContentConfig(
|
| 132 |
-
temperature=0.
|
| 133 |
-
top_p=0.
|
| 134 |
-
|
| 135 |
)
|
| 136 |
)
|
| 137 |
-
|
| 138 |
-
#
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
return analysis, docs_list
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
def process_and_zip_all_images(images, api_key, zip_name="Valid_Docs.zip"):
|
| 163 |
# Inisialisasi Gemini client
|
| 164 |
client = genai.Client(api_key=api_key)
|
| 165 |
|
|
@@ -205,14 +240,11 @@ Output:
|
|
| 205 |
for doc_name, images in grouped.items():
|
| 206 |
images_rgb = [img.convert("RGB") for img in images]
|
| 207 |
pdf_path = os.path.join(temp_dir, f"{doc_name}.pdf")
|
| 208 |
-
|
| 209 |
if len(images_rgb) == 1:
|
| 210 |
images_rgb[0].save(pdf_path, save_all=True)
|
| 211 |
else:
|
| 212 |
images_rgb[0].save(pdf_path, save_all=True, append_images=images_rgb[1:])
|
| 213 |
-
|
| 214 |
zipf.write(pdf_path, arcname=f"{doc_name}.pdf")
|
| 215 |
-
|
| 216 |
return zip_path
|
| 217 |
|
| 218 |
|
|
@@ -242,7 +274,7 @@ def main_process(files, tanggal_berangkat, tanggal_pulang, api_key):
|
|
| 242 |
raise ValueError(f"File {file_path} is not a valid image, PDF, or ZIP.")
|
| 243 |
|
| 244 |
# Generate summary from images
|
| 245 |
-
summary,
|
| 246 |
rdf = random.randint(5, 10)
|
| 247 |
time.sleep(rdf)
|
| 248 |
|
|
@@ -259,8 +291,8 @@ def main_process(files, tanggal_berangkat, tanggal_pulang, api_key):
|
|
| 259 |
doc.save(temp_docx_path)
|
| 260 |
|
| 261 |
# Filtering the file
|
| 262 |
-
zip_file_path = process_and_zip_all_images(all_images, api_key, zip_name=f'
|
| 263 |
-
return temp_docx_path,
|
| 264 |
|
| 265 |
|
| 266 |
# Gradio UI update: add ".zip" to accepted file types
|
|
@@ -291,16 +323,18 @@ with gr.Blocks() as demo:
|
|
| 291 |
)
|
| 292 |
|
| 293 |
run_btn = gr.Button("π Run Analysis")
|
| 294 |
-
output = gr.Textbox(label="π INVALID DOCUMENT LIST", lines=5)
|
| 295 |
|
| 296 |
with gr.Row():
|
| 297 |
download_output_docx = gr.File(label="π₯ Download Summary as DOCX", visible=True)
|
| 298 |
download_valid_zip = gr.File(label="π₯ Download all PDF document in zip", visible=True)
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
run_btn.click(
|
| 301 |
fn=main_process,
|
| 302 |
inputs=[file_input, tanggal_berangkat, tanggal_pulang, api_key],
|
| 303 |
-
outputs=[download_output_docx,
|
| 304 |
)
|
| 305 |
|
| 306 |
demo.launch(debug=True)
|
|
|
|
| 12 |
import time
|
| 13 |
import random
|
| 14 |
from google.genai.types import GenerateContentConfig
|
| 15 |
+
import json
|
| 16 |
|
| 17 |
|
| 18 |
def extract_zip_and_collect_files(zip_file_path):
|
|
|
|
| 42 |
if not os.path.isfile(pdf_file):
|
| 43 |
raise ValueError(f"File {pdf_file} does not exist.")
|
| 44 |
|
| 45 |
+
images = convert_from_path(pdf_file, dpi=300)
|
| 46 |
all_images.extend(images)
|
| 47 |
|
| 48 |
return all_images
|
|
|
|
| 50 |
|
| 51 |
# Function to analyze the extracted image using Google GenAI
|
| 52 |
def gemini_analysis(images, tanggal_berangkat, tanggal_pulang, api_key):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
client = genai.Client(api_key=api_key)
|
| 54 |
# Define your prompt
|
| 55 |
+
prompt = '''Anda bertugas memvalidasi kelengkapan, kesesuaian dan konsistensi data dari dokumen individual berdasarkan syarat Visa di bawah. Cukup fokus pada syarat yang diberikan tanpa mengambil referensi lain.
|
| 56 |
+
Pastikan data seperti nama, tanggal, tujuan dan lainnya konsisten antar dokumen.
|
|
|
|
|
|
|
|
|
|
| 57 |
---
|
| 58 |
DAFTAR SYARAT DOKUMEN YANG WAJIB DIPERIKSA:
|
| 59 |
1. Paspor
|
| 60 |
+
WAJIB ADA
|
| 61 |
Asli, aktif min. 6 bulan setelah tanggal kepulangan
|
| 62 |
Ada tanda tangan pemilik
|
| 63 |
Lampirkan paspor lama jika ada visa perjalanan sebelumnya
|
|
|
|
| 65 |
2. Fotokopi Paspor
|
| 66 |
Halaman depan & tanda tangan
|
| 67 |
Semua visa perjalanan sebelumnya jika ada (terutama 5 tahun terakhir)
|
| 68 |
+
Tidak wajib ada. Jika tidak ada, cukup beritahu bahwa Fotokopi Paspor belum ada.
|
| 69 |
3. Pas Foto
|
| 70 |
+
Wajib ada
|
| 71 |
Ukuran kurang lebih 3.5 x 4.5 cm, background putih
|
| 72 |
Wajah terlihat 80%, alis tidak tertutup, tidak pakai softlens, tidak berbayang
|
| 73 |
4. Kartu Keluarga (KK)
|
| 74 |
+
Wajib ada
|
| 75 |
Minimal versi 2019 atau berbarcode
|
| 76 |
Harus sesuai status (nikah/cerai/anak-anak β lampirkan dokumen pendukung)
|
| 77 |
Harus ditranslate untuk VFS Germany
|
| 78 |
+
5. Akte Nikah/Surat Nikah/Bukti nikah/Surat Cerai/ Bukti Cerai
|
| 79 |
+
Wajib ada jika sudah menikah atau bercerai. Tidak perlu jika belum menikah
|
| 80 |
Halaman biodata suami dan istri saja
|
| 81 |
Jika istri ikut, wajib melampirkan surat izin suami
|
| 82 |
6. KTP
|
| 83 |
+
Wajib ada
|
| 84 |
Nama harus sama dengan paspor dan tercantum pada Kartu Keluarga (KK)
|
| 85 |
Jika nama berbeda maka wajib lampirkan surat beda nama
|
| 86 |
7. Akta Kelahiran / Surat Kelahiran / Bukti Kelahiran / Ijazah
|
| 87 |
+
Wajib jika ada anak-anak atau peserta disponsori oranglain
|
| 88 |
8. Surat Sponsor (Guarantee Letter)
|
| 89 |
+
Wajib ada
|
| 90 |
Dalam Bahasa Inggris
|
| 91 |
Tujuan negara, tanggal trip
|
| 92 |
Siapa yang menanggung biaya
|
| 93 |
Wajib ada tertulis menjamin akan kembali ke Indonesia
|
| 94 |
9. Status Pekerjaan
|
| 95 |
+
(Surat Kerja/Slip Gaji/NIB/SIUP/Surat Jaminan Staf/Surat Sekolah/Surat Universitas/Kartu Pelajar/Kontrak Kerja/Bukti Transaksi/Surat Pensiun/Guarantee Keluarga)
|
| 96 |
+
Wajib ada salah satu
|
| 97 |
Pegawai: Surat kerja + Slip gaji 3 bulan
|
| 98 |
Pemilik usaha: NIB/SIUP + Surat jaminan staf
|
| 99 |
Pelajar: Surat sekolah/universitas + kartu pelajar
|
| 100 |
Freelancer/onlineshop: Kontrak kerja & 5 bukti transaksi
|
| 101 |
Pensiun: Surat pensiun + guarantee dari keluarga
|
| 102 |
+
10. Rekening Koran 3 bulan
|
| 103 |
+
WAJIB ADA
|
| 104 |
Atas nama pribadi & sponsor
|
| 105 |
Cap & logo bank, nama, nomor rekening
|
| 106 |
Saldo stabil min. Rp 35 juta/orang
|
| 107 |
11. Slip Gaji
|
| 108 |
+
Wajib ada jika pekerja
|
| 109 |
3 bulan terakhir
|
| 110 |
Jika suami lengkap β istri cukup lampirkan rekening koran suami
|
| 111 |
---
|
| 112 |
+
TEMPLATE PESAN PEMBERITAHUAN (notice_msg):
|
| 113 |
+
Berikut kami informasikan kekurangan dokumen yang *WAJIB* dibawa saat biometric visa schengen nanti ya :
|
| 114 |
+
|
| 115 |
+
1. ...
|
| 116 |
+
2. ...
|
| 117 |
+
3. ...
|
| 118 |
---
|
| 119 |
+
TEMPLATE OUTPUT JAWABAN DALAM FORMAT JSON:
|
| 120 |
+
{
|
| 121 |
+
"analysis":
|
| 122 |
+
{
|
| 123 |
+
"nama_dokumen_1": {"status": "VALID / INVALID / Tidak Perlu",
|
| 124 |
+
"description": "penjelasan detail"},
|
| 125 |
+
"nama_dokumen_2": {"status": "VALID / INVALID / Tidak Perlu",
|
| 126 |
+
"description": "penjelasan detail"},
|
| 127 |
+
"nama_dokumen_3": {"status": "VALID / INVALID / Tidak Perlu",
|
| 128 |
+
"description": "penjelasan detail"}
|
| 129 |
+
},
|
| 130 |
+
"summary": "...", # Analisa keseluruhan dokumen yang diperiksa
|
| 131 |
+
"invalid_item": ["nama_dokumen_1", "nama_dokumen_2"],
|
| 132 |
+
"notice_msg": "...",
|
| 133 |
+
"form_filling": {"Surname":"...",
|
| 134 |
+
"First Name":"...",
|
| 135 |
+
"Date of Birth":"...",
|
| 136 |
+
"Place of Birth":"...",
|
| 137 |
+
"Nationality":"...",
|
| 138 |
+
"Sex":"...",
|
| 139 |
+
"Mariage Status":"...",
|
| 140 |
+
"Passport Number":"...",
|
| 141 |
+
"Passport Expiry Date":"...",
|
| 142 |
+
"National Identity Number":"...",
|
| 143 |
+
"Travel Document Type":"...",
|
| 144 |
+
"Travel Document Number":"...",
|
| 145 |
+
"Date Of Issue":"...",
|
| 146 |
+
"Valid Until":"...",
|
| 147 |
+
"Issued Country":"...",
|
| 148 |
+
"Applicant's Home Address":"...",
|
| 149 |
+
"Applicant's Telephone Number":"...",
|
| 150 |
+
"Applicant's Email Address":"...",
|
| 151 |
+
"Current Occupation":"...",
|
| 152 |
+
"Employer/Educational Address":"...",
|
| 153 |
+
"Journey Purpose":"...",
|
| 154 |
+
"Destination":"...",
|
| 155 |
+
"Duration":"...",
|
| 156 |
+
"Number of Entries":"...", # single or multiple
|
| 157 |
+
"already has fingerprint":"...",
|
| 158 |
+
"inviting person from each destination":"...",
|
| 159 |
+
"inviting person email address":"...",
|
| 160 |
+
"Traveling and living cost covered by":"..."
|
| 161 |
+
} # Hanya isi yang ada di dokumen saja. Jika tidak ada, tidak usah diisi atau dimunculkan
|
| 162 |
+
}
|
| 163 |
'''
|
| 164 |
|
| 165 |
# Perform document analysis
|
| 166 |
+
prompt_with_date = f'Tanggal Berangkat={tanggal_berangkat}. Tanggal pulang={tanggal_pulang}\n\n{prompt}'
|
| 167 |
+
response = client.models.generate_content(
|
| 168 |
model="gemini-2.0-flash-lite",
|
| 169 |
+
contents=[prompt_with_date] + images,
|
| 170 |
config=GenerateContentConfig(
|
| 171 |
+
temperature=0.2,
|
| 172 |
+
top_p=0.2,
|
| 173 |
+
response_mime_type="application/json"
|
| 174 |
)
|
| 175 |
)
|
| 176 |
+
raw_output = response.text
|
| 177 |
+
# β
Inisialisasi variabel default
|
| 178 |
+
analysis = {}
|
| 179 |
+
summary = ""
|
| 180 |
+
invalid_list = []
|
| 181 |
+
notice_msg = ""
|
| 182 |
+
form_filling = {}
|
| 183 |
+
try:
|
| 184 |
+
parsed_output = json.loads(response.text)
|
| 185 |
+
analysis = parsed_output.get("analysis", {})
|
| 186 |
+
analysis_str = json.dumps(analysis, indent=2, ensure_ascii=False)
|
| 187 |
+
summary = parsed_output.get("summary", "")
|
| 188 |
+
invalid_list = parsed_output.get("invalid_item", [])
|
| 189 |
+
invalid_list_str = json.dumps(invalid_list, indent=2, ensure_ascii=False)
|
| 190 |
+
notice_msg = parsed_output.get("notice_msg", "")
|
| 191 |
+
form_filling = parsed_output.get("form_filling", "")
|
| 192 |
+
form_filling_str = json.dumps(form_filling, indent=2, ensure_ascii=False)
|
| 193 |
+
except Exception as e:
|
| 194 |
+
print(f"Error parsing JSON: {e}")
|
| 195 |
+
return raw_output, analysis_str, summary, invalid_list_str, notice_msg, form_filling_str
|
| 196 |
+
|
| 197 |
+
def process_and_zip_all_images(images, api_key, zip_name="All_PDF_Docs.zip"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
# Inisialisasi Gemini client
|
| 199 |
client = genai.Client(api_key=api_key)
|
| 200 |
|
|
|
|
| 240 |
for doc_name, images in grouped.items():
|
| 241 |
images_rgb = [img.convert("RGB") for img in images]
|
| 242 |
pdf_path = os.path.join(temp_dir, f"{doc_name}.pdf")
|
|
|
|
| 243 |
if len(images_rgb) == 1:
|
| 244 |
images_rgb[0].save(pdf_path, save_all=True)
|
| 245 |
else:
|
| 246 |
images_rgb[0].save(pdf_path, save_all=True, append_images=images_rgb[1:])
|
|
|
|
| 247 |
zipf.write(pdf_path, arcname=f"{doc_name}.pdf")
|
|
|
|
| 248 |
return zip_path
|
| 249 |
|
| 250 |
|
|
|
|
| 274 |
raise ValueError(f"File {file_path} is not a valid image, PDF, or ZIP.")
|
| 275 |
|
| 276 |
# Generate summary from images
|
| 277 |
+
raw_output, analysis_str, summary, invalid_list_str, notice_msg, form_filling_str = gemini_analysis(all_images, tanggal_berangkat, tanggal_pulang, api_key)
|
| 278 |
rdf = random.randint(5, 10)
|
| 279 |
time.sleep(rdf)
|
| 280 |
|
|
|
|
| 291 |
doc.save(temp_docx_path)
|
| 292 |
|
| 293 |
# Filtering the file
|
| 294 |
+
zip_file_path = process_and_zip_all_images(all_images, api_key, zip_name=f'All_PDF_Docs_{base_name}.zip')
|
| 295 |
+
return temp_docx_path, form_filling_str, zip_file_path, invalid_list_str
|
| 296 |
|
| 297 |
|
| 298 |
# Gradio UI update: add ".zip" to accepted file types
|
|
|
|
| 323 |
)
|
| 324 |
|
| 325 |
run_btn = gr.Button("π Run Analysis")
|
|
|
|
| 326 |
|
| 327 |
with gr.Row():
|
| 328 |
download_output_docx = gr.File(label="π₯ Download Summary as DOCX", visible=True)
|
| 329 |
download_valid_zip = gr.File(label="π₯ Download all PDF document in zip", visible=True)
|
| 330 |
+
|
| 331 |
+
form_filling_output = gr.Textbox(label="π FORM FILLING RESULT", lines=5)
|
| 332 |
+
invalid_list_output = gr.Textbox(label="π INVALID DOCUMENT LIST", lines=5)
|
| 333 |
|
| 334 |
run_btn.click(
|
| 335 |
fn=main_process,
|
| 336 |
inputs=[file_input, tanggal_berangkat, tanggal_pulang, api_key],
|
| 337 |
+
outputs=[download_output_docx, form_filling_output, download_valid_zip, invalid_list_output]
|
| 338 |
)
|
| 339 |
|
| 340 |
demo.launch(debug=True)
|