Corin1998 commited on
Commit
abef298
·
verified ·
1 Parent(s): 8d4bc07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -16
app.py CHANGED
@@ -31,19 +31,17 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
31
  raw_bytes = f.read()
32
  filetype = detect_filetype(f.name, raw_bytes)
33
 
34
- # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
35
  if filetype in {"pdf", "image"}:
36
  text = extract_text_with_openai(raw_bytes, filename=f.name, filetype=filetype)
37
  else:
38
  base_text = load_doc_text(filetype, raw_bytes)
39
- # 生テキストをそのままOpenAIへ渡し、軽く整形した全文を返す
40
  text = extract_text_with_openai(base_text.encode("utf-8"), filename=f.name, filetype="txt")
41
 
42
  raw_texts.append({"filename": f.name, "text": text})
43
 
44
- # 2) OpenAIでセクション構造化
45
  structured = structure_with_openai(text)
46
- # 念のためルールベース正規化も適用(期間抽出など補助)
47
  normalized = normalize_resume({
48
  "work_experience": structured.get("work_experience_raw", ""),
49
  "education": structured.get("education_raw", ""),
@@ -57,10 +55,10 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
57
  "normalized": normalized,
58
  })
59
 
60
- # 3) 統合(複数ファイル→1候補者)
61
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
62
 
63
- # 4) スキル抽出(辞書/正規表現)
64
  merged_text = "\n\n".join([r["text"] for r in partial_records])
65
  skills = extract_skills(merged_text, {
66
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
@@ -76,10 +74,10 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
76
  # 6) 品質スコア
77
  score = compute_quality_score(merged_text, merged)
78
 
79
- # 7) 要約(300/100/1文)
80
  summaries = summarize_with_openai(merged_text)
81
 
82
- # 8) 構造化出力
83
  result_json = {
84
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
85
  "files": [f.name for f in files],
@@ -91,7 +89,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
91
  "notes": additional_notes,
92
  }
93
 
94
- # 9) HF Datasets 保存
95
  dataset_repo = os.environ.get("DATASET_REPO")
96
  commit_info = None
97
  if dataset_repo:
@@ -109,7 +107,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
109
 
110
  return (
111
  json.dumps(result_json, ensure_ascii=False, indent=2),
112
- skills,
113
  json.dumps(score, ensure_ascii=False, indent=2),
114
  summaries["300chars"],
115
  summaries["100chars"],
@@ -127,7 +125,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
127
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
128
  file_count="multiple",
129
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
130
- type="file" # ※読み取りコードが .read() 前提のため file のまま
131
  )
132
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
133
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
@@ -138,11 +136,11 @@ with gr.Blocks(title=APP_TITLE) as demo:
138
  out_json = gr.Code(label="統合出力 (JSON)")
139
 
140
  with gr.Tab("抽出スキル"):
141
- # gr.JSON → gr.Code に変更(Gradio 4.44.0 の schema 解析バグ回
142
- out_skills = gr.Code(label="スキル一覧(JSON表示)")
143
 
144
  with gr.Tab("品質スコア"):
145
- out_score = gr.Code(label="品質評価")
146
 
147
  with gr.Tab("要約 (300/100/1文)"):
148
  out_sum_300 = gr.Textbox(label="300字要約")
@@ -153,7 +151,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
153
  out_pdf = gr.File(label="匿名PDFダウンロード")
154
 
155
  with gr.Tab("Datasets 保存ログ"):
156
- out_commit = gr.Code(label="コミット情報")
157
 
158
  run_btn.click(
159
  process_resumes,
@@ -163,5 +161,5 @@ with gr.Blocks(title=APP_TITLE) as demo:
163
 
164
 
165
  if __name__ == "__main__":
166
- # HF Spaces localhost アクセス時の ValueError を回避
167
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
31
  raw_bytes = f.read()
32
  filetype = detect_filetype(f.name, raw_bytes)
33
 
34
+ # 1) テキスト抽出
35
  if filetype in {"pdf", "image"}:
36
  text = extract_text_with_openai(raw_bytes, filename=f.name, filetype=filetype)
37
  else:
38
  base_text = load_doc_text(filetype, raw_bytes)
 
39
  text = extract_text_with_openai(base_text.encode("utf-8"), filename=f.name, filetype="txt")
40
 
41
  raw_texts.append({"filename": f.name, "text": text})
42
 
43
+ # 2) 構造化
44
  structured = structure_with_openai(text)
 
45
  normalized = normalize_resume({
46
  "work_experience": structured.get("work_experience_raw", ""),
47
  "education": structured.get("education_raw", ""),
 
55
  "normalized": normalized,
56
  })
57
 
58
+ # 3) 統合
59
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
60
 
61
+ # 4) スキル抽出
62
  merged_text = "\n\n".join([r["text"] for r in partial_records])
63
  skills = extract_skills(merged_text, {
64
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
 
74
  # 6) 品質スコア
75
  score = compute_quality_score(merged_text, merged)
76
 
77
+ # 7) 要約
78
  summaries = summarize_with_openai(merged_text)
79
 
80
+ # 8) まとめ
81
  result_json = {
82
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
83
  "files": [f.name for f in files],
 
89
  "notes": additional_notes,
90
  }
91
 
92
+ # 9) HF Datasets 保存(任意)
93
  dataset_repo = os.environ.get("DATASET_REPO")
94
  commit_info = None
95
  if dataset_repo:
 
107
 
108
  return (
109
  json.dumps(result_json, ensure_ascii=False, indent=2),
110
+ json.dumps(skills, ensure_ascii=False, indent=2),
111
  json.dumps(score, ensure_ascii=False, indent=2),
112
  summaries["300chars"],
113
  summaries["100chars"],
 
125
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
126
  file_count="multiple",
127
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
128
+ type="file"
129
  )
130
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
131
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
 
136
  out_json = gr.Code(label="統合出力 (JSON)")
137
 
138
  with gr.Tab("抽出スキル"):
139
+ # Gradio 4.44.0 の schema まわりをけるため JSON 表示は Code に
140
+ out_skills = gr.Code(label="スキル一覧(JSON)")
141
 
142
  with gr.Tab("品質スコア"):
143
+ out_score = gr.Code(label="品質評価(JSON)")
144
 
145
  with gr.Tab("要約 (300/100/1文)"):
146
  out_sum_300 = gr.Textbox(label="300字要約")
 
151
  out_pdf = gr.File(label="匿名PDFダウンロード")
152
 
153
  with gr.Tab("Datasets 保存ログ"):
154
+ out_commit = gr.Code(label="コミット情報(JSON)")
155
 
156
  run_btn.click(
157
  process_resumes,
 
161
 
162
 
163
  if __name__ == "__main__":
164
+ # HF Spaces での公開実行(localhost アクセス不可対策)
165
  demo.launch(server_name="0.0.0.0", server_port=7860)