firmanaziz commited on
Commit
01b1ca6
Β·
verified Β·
1 Parent(s): 93d6ef0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -55
app.py CHANGED
@@ -3,16 +3,16 @@ import google.generativeai as genai
3
  import fitz # PyMuPDF
4
  import json
5
  import os
 
6
  import urllib.parse
7
- import base64 # Diperlukan untuk client-side API call
8
 
9
- # --- KONFIGURASI API KEY (TETAP SAMA) ---
10
  API_CONFIGURED = False
11
  try:
12
- api_key = os.environ.get('GEMINI_API_KEY')
13
  if api_key:
14
  genai.configure(api_key=api_key)
15
- model = genai.GenerativeModel('gemini-flash-lite-latest')
16
  API_CONFIGURED = True
17
  print("βœ… Konfigurasi API dan model berhasil.")
18
  else:
@@ -20,7 +20,7 @@ try:
20
  except Exception as e:
21
  print(f"πŸ›‘ Terjadi error saat inisialisasi: {e}")
22
 
23
- # --- KONSTANTA BATAS TOKEN OUTPUT ---
24
  MAX_OUTPUT_TOKENS = 8192
25
 
26
  # --- FUNGSI-FUNGSI UTAMA ---
@@ -39,31 +39,50 @@ def generate_search_links(keywords):
39
  keywords_encoded = urllib.parse.quote_plus(keywords)
40
  keywords_hyphenated = keywords.lower().replace(" ", "-").replace("(", "").replace(")", "")
41
  links = {
42
- "LinkedIn": f"https://www.linkedin.com/jobs/search/?keywords={keywords_encoded}&location=Indonesia",
43
- "JobStreet": f"https://www.jobstreet.co.id/id/job-search/{keywords_hyphenated}-jobs/",
44
- "Glints": f"https://glints.com/id/opportunities/jobs/explore?keyword={keywords_encoded}",
45
- "Indeed": f"https://id.indeed.com/jobs?q={keywords_encoded}",
46
- "Google Jobs": f"https://www.google.com/search?q={keywords_encoded}+jobs+in+Indonesia&ibp=htl;jobs"
47
  }
48
  return links
49
 
50
  def parse_json_safe(text: str) -> dict:
51
- clean = text.strip()
52
- if clean.startswith("```"):
53
- parts = clean.split("```")
54
- for part in parts:
55
- candidate = part.lstrip("json").strip()
56
- if candidate.startswith("{"):
57
- clean = candidate
58
- break
59
- start = clean.find("{")
60
- end = clean.rfind("}")
 
 
 
 
 
 
 
 
 
61
  if start != -1 and end != -1 and end > start:
62
- clean = clean[start:end + 1]
63
- return json.loads(clean)
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def log_token_usage(usage_metadata):
66
- """Log penggunaan token dari usage_metadata ke console."""
67
  if usage_metadata is None:
68
  print("⚠️ Token usage: data tidak tersedia.")
69
  return
@@ -78,7 +97,6 @@ def log_token_usage(usage_metadata):
78
  print("=" * 40)
79
 
80
  def analyze_career_path(cv_file):
81
- """Fungsi utama pipeline: Analisis CV -> Buat Laporan JSON -> Buat Link -> Gabungkan."""
82
  if not API_CONFIGURED:
83
  raise gr.Error("API Key Gemini belum terkonfigurasi. Periksa Logs aplikasi.")
84
  if cv_file is None:
@@ -86,67 +104,87 @@ def analyze_career_path(cv_file):
86
 
87
  try:
88
  print("--- Memulai Proses Analisis Karir ---")
89
-
90
  teks_cv = ekstrak_teks_dari_pdf(cv_file.name)
91
  if not teks_cv:
92
  raise gr.Error("PDF kosong atau tidak dapat dibaca.")
93
  print("βœ… Teks berhasil diekstrak.")
94
 
95
- print("2. Mengirim permintaan analisis karir ke Gemini...")
96
  prompt_analisis_karir = f"""
97
- Anda adalah seorang "Career Analyst AI". Baca teks CV dan buat laporan peluang karir dalam format JSON.
98
- Teks CV: --- {teks_cv} ---
99
- Struktur JSON yang diinginkan:
100
- - "jabatan_ideal": Jabatan paling ideal untuk kandidat.
101
- - "alasan_kecocokan": Array (list) berisi 3-4 poin MENGAPA kandidat cocok.
102
- - "deskripsi_pekerjaan": Array (list) berisi 5 poin deskripsi pekerjaan umum.
103
- - "potensi_karir": Array (list) berisi 3-4 jalur pengembangan karir.
104
- - "kisaran_gaji": Objek JSON berisi estimasi gaji untuk level "junior", "mid_level", dan "senior".
105
- - "kelebihan_tambahan": Array (list) berisi 1-2 poin saran atau kelebihan unik kandidat.
106
- Pastikan output hanya berupa JSON saja.
107
- """
108
-
109
- # βœ… Tambahan: max_output_tokens untuk membatasi token output
 
 
 
 
 
 
 
 
 
 
 
 
110
  generation_config = genai.types.GenerationConfig(
111
- response_mime_type="application/json",
112
  max_output_tokens=MAX_OUTPUT_TOKENS,
113
  )
114
  response = model.generate_content(prompt_analisis_karir, generation_config=generation_config)
115
 
116
- # βœ… Tambahan: log penggunaan token ke console
117
  log_token_usage(getattr(response, 'usage_metadata', None))
118
-
119
- print(f"πŸ“ Raw response preview: {response.text[:120]!r}")
120
- response_json = parse_json_safe(response.text)
121
- print("βœ… Laporan karir komprehensif berhasil diterima.")
122
-
123
- print("3. Membuat tautan pencarian dari hasil analisis...")
124
- keywords_from_analysis = response_json.get("jabatan_ideal", "")
125
- search_links = generate_search_links(keywords_from_analysis)
126
-
 
 
 
 
 
 
 
 
127
  response_json["tautan_pencarian"] = search_links
128
- print("βœ… Tautan pencarian ditambahkan ke JSON.")
129
 
130
  print("--- Proses Selesai ---")
131
  return response_json
132
 
 
 
133
  except Exception as e:
134
  print(f"πŸ›‘ ERROR DALAM FUNGSI ANALISIS: {e}")
135
  raise gr.Error(f"Terjadi kesalahan: {e}")
136
 
137
- # --- MEMBUAT INTERFACE GRADIO ---
138
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
139
  gr.Markdown("# πŸš€ API Analis Peluang Karir Personal")
140
  gr.Markdown("Antarmuka ini dapat digunakan untuk pengujian. Endpoint API publik tersedia di `/run/predict` untuk integrasi ke website Anda.")
141
-
142
  with gr.Row():
143
  with gr.Column(scale=1):
144
  cv_pdf = gr.File(label="Upload CV (PDF) untuk Uji Coba", file_types=[".pdf"])
145
  analyze_button = gr.Button("πŸ” Analisis Karir Saya", variant="primary")
146
-
147
  with gr.Column(scale=2):
148
  output_analysis = gr.JSON(label="Output JSON dari API")
149
-
150
  analyze_button.click(
151
  fn=analyze_career_path,
152
  inputs=[cv_pdf],
 
3
  import fitz # PyMuPDF
4
  import json
5
  import os
6
+ import re
7
  import urllib.parse
 
8
 
9
+ # --- KONFIGURASI API KEY ---
10
  API_CONFIGURED = False
11
  try:
12
+ api_key = os.environ.get('GEMINI_API_KEY')
13
  if api_key:
14
  genai.configure(api_key=api_key)
15
+ model = genai.GenerativeModel('gemma-3-1b-it') # Gemma 3 1B
16
  API_CONFIGURED = True
17
  print("βœ… Konfigurasi API dan model berhasil.")
18
  else:
 
20
  except Exception as e:
21
  print(f"πŸ›‘ Terjadi error saat inisialisasi: {e}")
22
 
23
+ # --- KONSTANTA ---
24
  MAX_OUTPUT_TOKENS = 8192
25
 
26
  # --- FUNGSI-FUNGSI UTAMA ---
 
39
  keywords_encoded = urllib.parse.quote_plus(keywords)
40
  keywords_hyphenated = keywords.lower().replace(" ", "-").replace("(", "").replace(")", "")
41
  links = {
42
+ "LinkedIn": f"https://www.linkedin.com/jobs/search/?keywords={keywords_encoded}&location=Indonesia",
43
+ "JobStreet": f"https://www.jobstreet.co.id/id/job-search/{keywords_hyphenated}-jobs/",
44
+ "Glints": f"https://glints.com/id/opportunities/jobs/explore?keyword={keywords_encoded}",
45
+ "Indeed": f"https://id.indeed.com/jobs?q={keywords_encoded}",
46
+ "Google Jobs":f"https://www.google.com/search?q={keywords_encoded}+jobs+in+Indonesia&ibp=htl;jobs"
47
  }
48
  return links
49
 
50
  def parse_json_safe(text: str) -> dict:
51
+ """
52
+ Parse JSON dari teks bebas model.
53
+ Strategi (urutan prioritas):
54
+ 1. Cari blok ```json ... ``` atau ``` ... ```
55
+ 2. Cari objek { ... } terluar
56
+ 3. Raise error jika semua gagal
57
+ """
58
+ # Strategi 1: ambil dari blok markdown code fence
59
+ fence_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
60
+ if fence_match:
61
+ candidate = fence_match.group(1)
62
+ try:
63
+ return json.loads(candidate)
64
+ except json.JSONDecodeError:
65
+ pass # lanjut ke strategi berikutnya
66
+
67
+ # Strategi 2: ambil objek { ... } terluar
68
+ start = text.find("{")
69
+ end = text.rfind("}")
70
  if start != -1 and end != -1 and end > start:
71
+ candidate = text[start:end + 1]
72
+ try:
73
+ return json.loads(candidate)
74
+ except json.JSONDecodeError as e:
75
+ raise ValueError(
76
+ f"Ditemukan struktur JSON tapi gagal di-parse: {e}\n"
77
+ f"Cuplikan teks: {candidate[:300]}"
78
+ )
79
+
80
+ raise ValueError(
81
+ f"Tidak ditemukan JSON valid dalam respons model.\n"
82
+ f"Cuplikan respons: {text[:300]}"
83
+ )
84
 
85
  def log_token_usage(usage_metadata):
 
86
  if usage_metadata is None:
87
  print("⚠️ Token usage: data tidak tersedia.")
88
  return
 
97
  print("=" * 40)
98
 
99
  def analyze_career_path(cv_file):
 
100
  if not API_CONFIGURED:
101
  raise gr.Error("API Key Gemini belum terkonfigurasi. Periksa Logs aplikasi.")
102
  if cv_file is None:
 
104
 
105
  try:
106
  print("--- Memulai Proses Analisis Karir ---")
107
+
108
  teks_cv = ekstrak_teks_dari_pdf(cv_file.name)
109
  if not teks_cv:
110
  raise gr.Error("PDF kosong atau tidak dapat dibaca.")
111
  print("βœ… Teks berhasil diekstrak.")
112
 
113
+ print("2. Mengirim permintaan analisis karir ke model...")
114
  prompt_analisis_karir = f"""
115
+ Anda adalah seorang "Career Analyst AI". Baca teks CV berikut dan buat laporan peluang karir.
116
+
117
+ Teks CV:
118
+ ---
119
+ {teks_cv}
120
+ ---
121
+
122
+ PENTING: Balas HANYA dengan satu blok JSON murni. Jangan tambahkan teks, penjelasan, atau komentar apapun di luar JSON.
123
+ Format output WAJIB persis seperti ini:
124
+
125
+ {{
126
+ "jabatan_ideal": "string",
127
+ "alasan_kecocokan": ["poin 1", "poin 2", "poin 3", "poin 4"],
128
+ "deskripsi_pekerjaan": ["poin 1", "poin 2", "poin 3", "poin 4", "poin 5"],
129
+ "potensi_karir": ["poin 1", "poin 2", "poin 3", "poin 4"],
130
+ "kisaran_gaji": {{
131
+ "junior": "Rp X - Rp Y / bulan",
132
+ "mid_level": "Rp X - Rp Y / bulan",
133
+ "senior": "Rp X - Rp Y / bulan"
134
+ }},
135
+ "kelebihan_tambahan": ["poin 1", "poin 2"]
136
+ }}
137
+ """
138
+
139
+ # ⚠️ Gemma 3 tidak support response_mime_type JSON β€” dihapus
140
  generation_config = genai.types.GenerationConfig(
 
141
  max_output_tokens=MAX_OUTPUT_TOKENS,
142
  )
143
  response = model.generate_content(prompt_analisis_karir, generation_config=generation_config)
144
 
 
145
  log_token_usage(getattr(response, 'usage_metadata', None))
146
+
147
+ raw_text = response.text
148
+ print(f"πŸ“ Raw response preview: {raw_text[:200]!r}")
149
+
150
+ # Parse manual β€” tidak bergantung pada response_mime_type
151
+ try:
152
+ response_json = parse_json_safe(raw_text)
153
+ print("βœ… JSON berhasil di-parse.")
154
+ except ValueError as parse_err:
155
+ print(f"πŸ›‘ Gagal parse JSON: {parse_err}")
156
+ raise gr.Error(
157
+ f"Model tidak menghasilkan JSON yang valid.\n"
158
+ f"Detail: {parse_err}"
159
+ )
160
+
161
+ print("3. Membuat tautan pencarian...")
162
+ search_links = generate_search_links(response_json.get("jabatan_ideal", ""))
163
  response_json["tautan_pencarian"] = search_links
164
+ print("βœ… Tautan pencarian ditambahkan.")
165
 
166
  print("--- Proses Selesai ---")
167
  return response_json
168
 
169
+ except gr.Error:
170
+ raise
171
  except Exception as e:
172
  print(f"πŸ›‘ ERROR DALAM FUNGSI ANALISIS: {e}")
173
  raise gr.Error(f"Terjadi kesalahan: {e}")
174
 
175
+ # --- INTERFACE GRADIO ---
176
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
177
  gr.Markdown("# πŸš€ API Analis Peluang Karir Personal")
178
  gr.Markdown("Antarmuka ini dapat digunakan untuk pengujian. Endpoint API publik tersedia di `/run/predict` untuk integrasi ke website Anda.")
179
+
180
  with gr.Row():
181
  with gr.Column(scale=1):
182
  cv_pdf = gr.File(label="Upload CV (PDF) untuk Uji Coba", file_types=[".pdf"])
183
  analyze_button = gr.Button("πŸ” Analisis Karir Saya", variant="primary")
184
+
185
  with gr.Column(scale=2):
186
  output_analysis = gr.JSON(label="Output JSON dari API")
187
+
188
  analyze_button.click(
189
  fn=analyze_career_path,
190
  inputs=[cv_pdf],