Corin1998 commited on
Commit
f7d4d1c
·
verified ·
1 Parent(s): a8c0580

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -33,7 +33,7 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
33
  raw_bytes = f.read()
34
  filetype = detect_filetype(filename, raw_bytes)
35
 
36
- # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
37
  if filetype in {"pdf", "image"}:
38
  text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
39
  else:
@@ -42,7 +42,7 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
42
 
43
  raw_texts.append({"filename": filename, "text": text})
44
 
45
- # 2) OpenAIでセクション構造化 → ルール正規化
46
  structured = structure_with_openai(text)
47
  normalized = normalize_resume({
48
  "work_experience": structured.get("work_experience_raw", ""),
@@ -57,10 +57,10 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
57
  "normalized": normalized,
58
  })
59
 
60
- # 3) 統合(複数ファイル→1候補者)
61
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
62
 
63
- # 4) スキル抽出(辞書/正規表現)
64
  merged_text = "\n\n".join([r["text"] for r in partial_records])
65
  skills = extract_skills(merged_text, {
66
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
@@ -69,17 +69,17 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
69
  "skills": ", ".join(merged.get("skills", [])),
70
  })
71
 
72
- # 5) 匿名化
73
  anonymized_text, anon_map = anonymize_text(merged_text)
74
  anon_pdf_bytes = render_anonymized_pdf(anonymized_text)
75
 
76
- # 6) 品質スコア
77
  score = compute_quality_score(merged_text, merged)
78
 
79
- # 7) 要約(300/100/1文)
80
  summaries = summarize_with_openai(merged_text)
81
 
82
- # 8) 構造化出力
83
  result_json = {
84
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
85
  "files": [os.path.basename(p) for p in filepaths],
@@ -91,7 +91,7 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
91
  "notes": additional_notes,
92
  }
93
 
94
- # 9) HF Datasets 保存
95
  dataset_repo = os.environ.get("DATASET_REPO")
96
  commit_info = None
97
  if dataset_repo:
@@ -107,14 +107,15 @@ def process_resumes(filepaths, candidate_id: str, additional_notes: str = ""):
107
 
108
  anon_pdf = (result_json["candidate_id"] + ".anon.pdf", anon_pdf_bytes)
109
 
 
110
  return (
111
- json.dumps(result_json, ensure_ascii=False, indent=2),
112
- json.dumps(skills, ensure_ascii=False, indent=2), # Code出力に合わせて文字列化
113
- json.dumps(score, ensure_ascii=False, indent=2),
114
  summaries["300chars"],
115
  summaries["100chars"],
116
  summaries["onesent"],
117
- anon_pdf,
118
  json.dumps(commit_info or {"status": "skipped (DATASET_REPO not set)"}, ensure_ascii=False, indent=2),
119
  )
120
 
@@ -127,7 +128,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
127
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
128
  file_count="multiple",
129
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
130
- type="filepath", # ← ここが重要('file' は不可
131
  )
132
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
133
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
@@ -138,10 +139,10 @@ with gr.Blocks(title=APP_TITLE) as demo:
138
  out_json = gr.Code(label="統合出力 (JSON)")
139
 
140
  with gr.Tab("抽出スキル"):
141
- out_skills = gr.Code(label="スキル一覧 (JSON)") # ← gr.JSON は型推論で落ちすいので Code に
142
 
143
  with gr.Tab("品質スコア"):
144
- out_score = gr.Code(label="品質評価")
145
 
146
  with gr.Tab("要約 (300/100/1文)"):
147
  out_sum_300 = gr.Textbox(label="300字要約")
@@ -160,6 +161,6 @@ with gr.Blocks(title=APP_TITLE) as demo:
160
  outputs=[out_json, out_skills, out_score, out_sum_300, out_sum_100, out_sum_1, out_pdf, out_commit],
161
  )
162
 
163
-
164
  if __name__ == "__main__":
165
- demo.launch()
 
 
33
  raw_bytes = f.read()
34
  filetype = detect_filetype(filename, raw_bytes)
35
 
36
+ # 1) テキスト抽出
37
  if filetype in {"pdf", "image"}:
38
  text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
39
  else:
 
42
 
43
  raw_texts.append({"filename": filename, "text": text})
44
 
45
+ # 2) 構造化 → 3) 正規化
46
  structured = structure_with_openai(text)
47
  normalized = normalize_resume({
48
  "work_experience": structured.get("work_experience_raw", ""),
 
57
  "normalized": normalized,
58
  })
59
 
60
+ # 4) 統合
61
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
62
 
63
+ # 5) スキル抽出
64
  merged_text = "\n\n".join([r["text"] for r in partial_records])
65
  skills = extract_skills(merged_text, {
66
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
 
69
  "skills": ", ".join(merged.get("skills", [])),
70
  })
71
 
72
+ # 6) 匿名化
73
  anonymized_text, anon_map = anonymize_text(merged_text)
74
  anon_pdf_bytes = render_anonymized_pdf(anonymized_text)
75
 
76
+ # 7) 品質スコア
77
  score = compute_quality_score(merged_text, merged)
78
 
79
+ # 8) 要約
80
  summaries = summarize_with_openai(merged_text)
81
 
82
+ # 9) 構造化出力
83
  result_json = {
84
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
85
  "files": [os.path.basename(p) for p in filepaths],
 
91
  "notes": additional_notes,
92
  }
93
 
94
+ # 10) HF Datasets 保存
95
  dataset_repo = os.environ.get("DATASET_REPO")
96
  commit_info = None
97
  if dataset_repo:
 
107
 
108
  anon_pdf = (result_json["candidate_id"] + ".anon.pdf", anon_pdf_bytes)
109
 
110
+ # 重要:gradioのAPIスキーマ生成バグ回避のため、dictはすべて文字列で返す
111
  return (
112
+ json.dumps(result_json, ensure_ascii=False, indent=2), # out_json -> Code(str)
113
+ json.dumps(skills, ensure_ascii=False, indent=2), # out_skills -> Code(str)
114
+ json.dumps(score, ensure_ascii=False, indent=2), # out_score -> Code(str)
115
  summaries["300chars"],
116
  summaries["100chars"],
117
  summaries["onesent"],
118
+ anon_pdf, # File(tuple)
119
  json.dumps(commit_info or {"status": "skipped (DATASET_REPO not set)"}, ensure_ascii=False, indent=2),
120
  )
121
 
 
128
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
129
  file_count="multiple",
130
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
131
+ type="filepath", # ← 'file' は 4.44 系で非推奨/不可
132
  )
133
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
134
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
 
139
  out_json = gr.Code(label="統合出力 (JSON)")
140
 
141
  with gr.Tab("抽出スキル"):
142
+ out_skills = gr.Code(label="スキル一覧 (JSON)") # ← gr.JSON める
143
 
144
  with gr.Tab("品質スコア"):
145
+ out_score = gr.Code(label="品質評価 (JSON)")
146
 
147
  with gr.Tab("要約 (300/100/1文)"):
148
  out_sum_300 = gr.Textbox(label="300字要約")
 
161
  outputs=[out_json, out_skills, out_score, out_sum_300, out_sum_100, out_sum_1, out_pdf, out_commit],
162
  )
163
 
 
164
  if __name__ == "__main__":
165
+ # ローカル不可環境に対応
166
+ demo.launch(share=True, server_name="0.0.0.0")