firmanaziz commited on
Commit
4298ccd
Β·
verified Β·
1 Parent(s): 01665a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -130
app.py CHANGED
@@ -4,25 +4,24 @@ import fitz # PyMuPDF
4
  import json
5
  import os
6
  import urllib.parse
 
7
 
8
- # --- KONFIGURASI API KEY ---
9
  API_CONFIGURED = False
10
  try:
11
- api_key = os.environ.get('GEMINI_API_KEY')
12
  if api_key:
13
  genai.configure(api_key=api_key)
14
- # Model paling murah & tersedia di Free Tier
15
- model = genai.GenerativeModel('models/gemini-2.5-flash')
16
  API_CONFIGURED = True
17
- print("βœ… Konfigurasi API dan model (gemini-2.5-flash) berhasil.")
18
  else:
19
  print("πŸ›‘ Secret 'GEMINI_API_KEY' tidak ditemukan.")
20
  except Exception as e:
21
  print(f"πŸ›‘ Terjadi error saat inisialisasi: {e}")
22
 
23
- # --- KONSTANTA TOKEN ---
24
- MAX_OUTPUT_TOKENS = 90000 # Batas token output (hemat kuota)
25
- MAX_INPUT_CHARS = 12000 # Batas karakter teks CV agar tidak meledak input token
26
 
27
  # --- FUNGSI-FUNGSI UTAMA ---
28
 
@@ -37,45 +36,19 @@ def ekstrak_teks_dari_pdf(path_file_pdf):
37
  def generate_search_links(keywords):
38
  if not keywords:
39
  return {}
40
- keywords_encoded = urllib.parse.quote_plus(keywords)
41
  keywords_hyphenated = keywords.lower().replace(" ", "-").replace("(", "").replace(")", "")
42
- return {
43
- "LinkedIn" : f"https://www.linkedin.com/jobs/search/?keywords={keywords_encoded}&location=Indonesia",
44
- "JobStreet" : f"https://www.jobstreet.co.id/id/job-search/{keywords_hyphenated}-jobs/",
45
- "Glints" : f"https://glints.com/id/opportunities/jobs/explore?keyword={keywords_encoded}",
46
- "Indeed" : f"https://id.indeed.com/jobs?q={keywords_encoded}",
47
  "Google Jobs": f"https://www.google.com/search?q={keywords_encoded}+jobs+in+Indonesia&ibp=htl;jobs"
48
  }
49
-
50
- def format_token_info(usage_metadata) -> str:
51
- """Mengubah usage_metadata Gemini menjadi tabel markdown yang rapi."""
52
- if usage_metadata is None:
53
- return "ℹ️ Data penggunaan token tidak tersedia."
54
-
55
- prompt_tokens = getattr(usage_metadata, 'prompt_token_count', 'N/A')
56
- candidate_tokens = getattr(usage_metadata, 'candidates_token_count', 'N/A')
57
- total_tokens = getattr(usage_metadata, 'total_token_count', 'N/A')
58
-
59
- def fmt(val):
60
- return f"{val:,}" if isinstance(val, int) else str(val)
61
-
62
- lines = [
63
- "---",
64
- "### πŸ“Š Penggunaan Token β€” gemini-2.0-flash",
65
- "| Kategori | Jumlah |",
66
- "|---|---|",
67
- f"| πŸ”Ό Input (prompt) | {fmt(prompt_tokens)} token |",
68
- f"| πŸ”½ Output (response) | {fmt(candidate_tokens)} token |",
69
- f"| **Total** | **{fmt(total_tokens)} token** |",
70
- f"| βš™οΈ Limit output dikonfigurasi | {MAX_OUTPUT_TOKENS:,} token |",
71
- ]
72
- return "\n".join(lines)
73
 
74
  def parse_json_safe(text: str) -> dict:
75
- """Parse JSON dari respons Gemini secara robust β€” tangani markdown fences & teks ekstra."""
76
  clean = text.strip()
77
-
78
- # Hapus markdown code fences: ```json...``` atau ```...```
79
  if clean.startswith("```"):
80
  parts = clean.split("```")
81
  for part in parts:
@@ -83,17 +56,29 @@ def parse_json_safe(text: str) -> dict:
83
  if candidate.startswith("{"):
84
  clean = candidate
85
  break
86
-
87
- # Ambil substring dari { pertama sampai } terakhir
88
  start = clean.find("{")
89
  end = clean.rfind("}")
90
  if start != -1 and end != -1 and end > start:
91
  clean = clean[start:end + 1]
92
-
93
  return json.loads(clean)
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def analyze_career_path(cv_file):
96
- """Pipeline utama: Analisis CV β†’ Laporan JSON β†’ Link β†’ Gabung + Info Token."""
97
  if not API_CONFIGURED:
98
  raise gr.Error("API Key Gemini belum terkonfigurasi. Periksa Logs aplikasi.")
99
  if cv_file is None:
@@ -101,109 +86,71 @@ def analyze_career_path(cv_file):
101
 
102
  try:
103
  print("--- Memulai Proses Analisis Karir ---")
104
-
105
- # 1. Ekstrak teks PDF
106
  teks_cv = ekstrak_teks_dari_pdf(cv_file.name)
107
- if not teks_cv or not teks_cv.strip():
108
- raise gr.Error("PDF kosong atau tidak dapat dibaca (kemungkinan file scan/gambar).")
109
-
110
- # Potong teks CV agar tidak melebihi batas input
111
- if len(teks_cv) > MAX_INPUT_CHARS:
112
- teks_cv = teks_cv[:MAX_INPUT_CHARS]
113
- print(f"⚠️ Teks CV dipotong hingga {MAX_INPUT_CHARS} karakter.")
114
- print(f"βœ… Teks berhasil diekstrak ({len(teks_cv)} karakter).")
115
-
116
- # 2. Kirim ke Gemini
117
- print("2. Mengirim permintaan ke gemini-2.0-flash...")
118
- prompt = f"""
119
- Anda adalah "Career Analyst AI". Baca CV berikut dan hasilkan laporan karir ringkas dalam format JSON.
120
-
121
- Teks CV:
122
- ---
123
- {teks_cv}
124
- ---
125
-
126
- Hasilkan JSON dengan PERSIS struktur berikut, jawab singkat dan padat:
127
- {{
128
- "jabatan_ideal": "<string>",
129
- "alasan_kecocokan": ["<poin 1>", "<poin 2>", "<poin 3>"],
130
- "deskripsi_pekerjaan": ["<poin 1>", "<poin 2>", "<poin 3>", "<poin 4>"],
131
- "potensi_karir": ["<jalur 1>", "<jalur 2>", "<jalur 3>"],
132
- "kisaran_gaji": {{
133
- "junior": "<estimasi IDR/bulan>",
134
- "mid_level": "<estimasi IDR/bulan>",
135
- "senior": "<estimasi IDR/bulan>"
136
- }},
137
- "kelebihan_tambahan": ["<saran 1>", "<saran 2>"]
138
- }}
139
-
140
- PENTING: Output HANYA JSON di atas. Tidak ada teks, penjelasan, atau markdown di luar JSON.
141
- """
142
-
143
  generation_config = genai.types.GenerationConfig(
144
  response_mime_type="application/json",
145
  max_output_tokens=MAX_OUTPUT_TOKENS,
146
- temperature=0.3,
147
  )
148
-
149
- response = model.generate_content(prompt, generation_config=generation_config)
150
-
151
- # 3. Parse JSON secara robust
152
- raw_text = response.text
153
- print(f"πŸ“ Raw response preview: {raw_text[:150]!r}")
154
-
155
- try:
156
- response_json = parse_json_safe(raw_text)
157
- except json.JSONDecodeError as je:
158
- print(f"πŸ›‘ JSON parse gagal: {je}")
159
- print(f"πŸ›‘ Raw text lengkap:\n{raw_text}")
160
- raise gr.Error(
161
- f"Respons Gemini bukan JSON valid. "
162
- f"Pastikan CV berisi teks yang bisa dibaca (bukan scan/gambar). "
163
- f"Detail: {je}"
164
- )
165
-
166
- print("βœ… Laporan karir berhasil di-parse.")
167
-
168
- # 4. Tambahkan link pencarian
169
- keywords = response_json.get("jabatan_ideal", "")
170
- response_json["tautan_pencarian"] = generate_search_links(keywords)
171
- print("βœ… Tautan pencarian ditambahkan.")
172
-
173
- # 5. Info token
174
- token_info = format_token_info(getattr(response, 'usage_metadata', None))
175
- print(f"πŸ“Š Usage metadata: {getattr(response, 'usage_metadata', 'N/A')}")
176
 
177
  print("--- Proses Selesai ---")
178
- return response_json, token_info
179
 
180
- except gr.Error:
181
- raise # teruskan gr.Error apa adanya tanpa dibungkus lagi
182
  except Exception as e:
183
- print(f"πŸ›‘ ERROR TIDAK TERDUGA: {e}")
184
- raise gr.Error(f"Terjadi kesalahan tidak terduga: {e}")
185
 
186
- # --- INTERFACE GRADIO ---
187
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
188
- gr.Markdown("# πŸš€ Analis Peluang Karir Personal")
189
- gr.Markdown(
190
- "Powered by **Gemini 2.0 Flash** β€” model tercepat & termurah di Free Tier. \n"
191
- f"βš™οΈ Batas output: **{MAX_OUTPUT_TOKENS} token** per analisis."
192
- )
193
-
194
  with gr.Row():
195
  with gr.Column(scale=1):
196
- cv_pdf = gr.File(label="πŸ“„ Upload CV (PDF)", file_types=[".pdf"])
197
  analyze_button = gr.Button("πŸ” Analisis Karir Saya", variant="primary")
198
-
199
  with gr.Column(scale=2):
200
- output_analysis = gr.JSON(label="πŸ“‹ Hasil Analisis (JSON)")
201
- token_display = gr.Markdown(label="πŸ“Š Info Token", value="*Belum ada analisis.*")
202
-
203
  analyze_button.click(
204
  fn=analyze_career_path,
205
  inputs=[cv_pdf],
206
- outputs=[output_analysis, token_display],
207
  show_progress='full'
208
  )
209
 
 
4
  import json
5
  import os
6
  import urllib.parse
7
+ import base64 # Diperlukan untuk client-side API call
8
 
9
+ # --- KONFIGURASI API KEY (TETAP SAMA) ---
10
  API_CONFIGURED = False
11
  try:
12
+ api_key = os.environ.get('GEMINI_API_KEY')
13
  if api_key:
14
  genai.configure(api_key=api_key)
15
+ model = genai.GenerativeModel('gemini-2.5-flash-lite')
 
16
  API_CONFIGURED = True
17
+ print("βœ… Konfigurasi API dan model berhasil.")
18
  else:
19
  print("πŸ›‘ Secret 'GEMINI_API_KEY' tidak ditemukan.")
20
  except Exception as e:
21
  print(f"πŸ›‘ Terjadi error saat inisialisasi: {e}")
22
 
23
+ # --- KONSTANTA BATAS TOKEN OUTPUT ---
24
+ MAX_OUTPUT_TOKENS = 8192
 
25
 
26
  # --- FUNGSI-FUNGSI UTAMA ---
27
 
 
36
  def generate_search_links(keywords):
37
  if not keywords:
38
  return {}
39
+ keywords_encoded = urllib.parse.quote_plus(keywords)
40
  keywords_hyphenated = keywords.lower().replace(" ", "-").replace("(", "").replace(")", "")
41
+ links = {
42
+ "LinkedIn": f"https://www.linkedin.com/jobs/search/?keywords={keywords_encoded}&location=Indonesia",
43
+ "JobStreet": f"https://www.jobstreet.co.id/id/job-search/{keywords_hyphenated}-jobs/",
44
+ "Glints": f"https://glints.com/id/opportunities/jobs/explore?keyword={keywords_encoded}",
45
+ "Indeed": f"https://id.indeed.com/jobs?q={keywords_encoded}",
46
  "Google Jobs": f"https://www.google.com/search?q={keywords_encoded}+jobs+in+Indonesia&ibp=htl;jobs"
47
  }
48
+ return links
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def parse_json_safe(text: str) -> dict:
 
51
  clean = text.strip()
 
 
52
  if clean.startswith("```"):
53
  parts = clean.split("```")
54
  for part in parts:
 
56
  if candidate.startswith("{"):
57
  clean = candidate
58
  break
 
 
59
  start = clean.find("{")
60
  end = clean.rfind("}")
61
  if start != -1 and end != -1 and end > start:
62
  clean = clean[start:end + 1]
 
63
  return json.loads(clean)
64
 
65
+ def log_token_usage(usage_metadata):
66
+ """Log penggunaan token dari usage_metadata ke console."""
67
+ if usage_metadata is None:
68
+ print("⚠️ Token usage: data tidak tersedia.")
69
+ return
70
+ prompt_tokens = getattr(usage_metadata, 'prompt_token_count', 'N/A')
71
+ candidate_tokens = getattr(usage_metadata, 'candidates_token_count', 'N/A')
72
+ total_tokens = getattr(usage_metadata, 'total_token_count', 'N/A')
73
+ print("=" * 40)
74
+ print("πŸ“Š TOKEN USAGE")
75
+ print(f" πŸ”Ό Input (prompt) : {prompt_tokens}")
76
+ print(f" πŸ”½ Output (response): {candidate_tokens} [limit: {MAX_OUTPUT_TOKENS}]")
77
+ print(f" βž• Total : {total_tokens}")
78
+ print("=" * 40)
79
+
80
  def analyze_career_path(cv_file):
81
+ """Fungsi utama pipeline: Analisis CV -> Buat Laporan JSON -> Buat Link -> Gabungkan."""
82
  if not API_CONFIGURED:
83
  raise gr.Error("API Key Gemini belum terkonfigurasi. Periksa Logs aplikasi.")
84
  if cv_file is None:
 
86
 
87
  try:
88
  print("--- Memulai Proses Analisis Karir ---")
89
+
 
90
  teks_cv = ekstrak_teks_dari_pdf(cv_file.name)
91
+ if not teks_cv:
92
+ raise gr.Error("PDF kosong atau tidak dapat dibaca.")
93
+ print("βœ… Teks berhasil diekstrak.")
94
+
95
+ print("2. Mengirim permintaan analisis karir ke Gemini...")
96
+ prompt_analisis_karir = f"""
97
+ Anda adalah seorang "Career Analyst AI". Baca teks CV dan buat laporan peluang karir dalam format JSON.
98
+ Teks CV: --- {teks_cv} ---
99
+ Struktur JSON yang diinginkan:
100
+ - "jabatan_ideal": Jabatan paling ideal untuk kandidat.
101
+ - "alasan_kecocokan": Array (list) berisi 3-4 poin MENGAPA kandidat cocok.
102
+ - "deskripsi_pekerjaan": Array (list) berisi 5 poin deskripsi pekerjaan umum.
103
+ - "potensi_karir": Array (list) berisi 3-4 jalur pengembangan karir.
104
+ - "kisaran_gaji": Objek JSON berisi estimasi gaji untuk level "junior", "mid_level", dan "senior".
105
+ - "kelebihan_tambahan": Array (list) berisi 1-2 poin saran atau kelebihan unik kandidat.
106
+ Pastikan output hanya berupa JSON saja.
107
+ """
108
+
109
+ # βœ… Tambahan: max_output_tokens untuk membatasi token output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  generation_config = genai.types.GenerationConfig(
111
  response_mime_type="application/json",
112
  max_output_tokens=MAX_OUTPUT_TOKENS,
 
113
  )
114
+ response = model.generate_content(prompt_analisis_karir, generation_config=generation_config)
115
+
116
+ # βœ… Tambahan: log penggunaan token ke console
117
+ log_token_usage(getattr(response, 'usage_metadata', None))
118
+
119
+ print(f"πŸ“ Raw response preview: {response.text[:120]!r}")
120
+ response_json = parse_json_safe(response.text)
121
+ print("βœ… Laporan karir komprehensif berhasil diterima.")
122
+
123
+ print("3. Membuat tautan pencarian dari hasil analisis...")
124
+ keywords_from_analysis = response_json.get("jabatan_ideal", "")
125
+ search_links = generate_search_links(keywords_from_analysis)
126
+
127
+ response_json["tautan_pencarian"] = search_links
128
+ print("βœ… Tautan pencarian ditambahkan ke JSON.")
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  print("--- Proses Selesai ---")
131
+ return response_json
132
 
 
 
133
  except Exception as e:
134
+ print(f"πŸ›‘ ERROR DALAM FUNGSI ANALISIS: {e}")
135
+ raise gr.Error(f"Terjadi kesalahan: {e}")
136
 
137
+ # --- MEMBUAT INTERFACE GRADIO ---
138
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
139
+ gr.Markdown("# πŸš€ API Analis Peluang Karir Personal")
140
+ gr.Markdown("Antarmuka ini dapat digunakan untuk pengujian. Endpoint API publik tersedia di `/run/predict` untuk integrasi ke website Anda.")
141
+
 
 
 
142
  with gr.Row():
143
  with gr.Column(scale=1):
144
+ cv_pdf = gr.File(label="Upload CV (PDF) untuk Uji Coba", file_types=[".pdf"])
145
  analyze_button = gr.Button("πŸ” Analisis Karir Saya", variant="primary")
146
+
147
  with gr.Column(scale=2):
148
+ output_analysis = gr.JSON(label="Output JSON dari API")
149
+
 
150
  analyze_button.click(
151
  fn=analyze_career_path,
152
  inputs=[cv_pdf],
153
+ outputs=[output_analysis],
154
  show_progress='full'
155
  )
156