Corin1998 commited on
Commit
914c816
·
verified ·
1 Parent(s): 7df6a8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -14
app.py CHANGED
@@ -27,7 +27,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
27
  partial_records = []
28
  raw_texts = []
29
 
30
- # gr.Files(type="filepath") → files はパスの配列
31
  for path in files:
32
  filepath = str(path)
33
  filename = os.path.basename(filepath)
@@ -36,12 +36,11 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
36
 
37
  filetype = detect_filetype(filename, raw_bytes)
38
 
39
- # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
40
  if filetype in {"pdf", "image"}:
41
  text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
42
  else:
43
  base_text = load_doc_text(filetype, raw_bytes)
44
- # 生テキストをOpenAIへ渡して整形
45
  text = extract_text_with_openai(base_text.encode("utf-8"), filename=filename, filetype="txt")
46
 
47
  raw_texts.append({"filename": filename, "text": text})
@@ -64,7 +63,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
64
  # 3) 統合(複数ファイル→1候補者)
65
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
66
 
67
- # 4) スキル抽出(辞書/正規表現)
68
  merged_text = "\n\n".join([r["text"] for r in partial_records])
69
  skills = extract_skills(merged_text, {
70
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
@@ -80,7 +79,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
80
  # 6) 品質スコア
81
  score = compute_quality_score(merged_text, merged)
82
 
83
- # 7) 要約(300/100/1文)
84
  summaries = summarize_with_openai(merged_text)
85
 
86
  # 8) 構造化出力
@@ -111,7 +110,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
111
 
112
  anon_pdf = (result_json["candidate_id"] + ".anon.pdf", anon_pdf_bytes)
113
 
114
- # gr.Code に渡すため、JSONはすべて str にして返す
115
  return (
116
  json.dumps(result_json, ensure_ascii=False, indent=2),
117
  json.dumps(skills, ensure_ascii=False, indent=2),
@@ -132,7 +131,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
132
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
133
  file_count="multiple",
134
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
135
- type="filepath", # ★重要:Gradio 4.44 は 'filepath' or 'binary'
136
  )
137
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
138
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
@@ -143,7 +142,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
143
  out_json = gr.Code(label="統合出力 (JSON)")
144
 
145
  with gr.Tab("抽出スキル"):
146
- out_skills = gr.Code(label="スキル一覧 (JSON)") # gr.JSON はスキーマ例外のため回避
147
 
148
  with gr.Tab("品質スコア"):
149
  out_score = gr.Code(label="品質評価")
@@ -167,9 +166,5 @@ with gr.Blocks(title=APP_TITLE) as demo:
167
 
168
 
169
  if __name__ == "__main__":
170
- # HF Spaces では share=True は未対応。ローカルでのみ share=True を有効化。
171
- on_spaces = bool(os.environ.get("SPACE_ID") or os.environ.get("SYSTEM") == "spaces")
172
- if on_spaces:
173
- demo.launch(server_name="0.0.0.0", server_port=7860)
174
- else:
175
- demo.launch(share=True)
 
27
  partial_records = []
28
  raw_texts = []
29
 
30
+ # gr.Files(type="filepath") → files はパス文字列のリスト
31
  for path in files:
32
  filepath = str(path)
33
  filename = os.path.basename(filepath)
 
36
 
37
  filetype = detect_filetype(filename, raw_bytes)
38
 
39
+ # 1) テキスト抽出:画像/PDFVision OCR、docx/txt→生文面+整形
40
  if filetype in {"pdf", "image"}:
41
  text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
42
  else:
43
  base_text = load_doc_text(filetype, raw_bytes)
 
44
  text = extract_text_with_openai(base_text.encode("utf-8"), filename=filename, filetype="txt")
45
 
46
  raw_texts.append({"filename": filename, "text": text})
 
63
  # 3) 統合(複数ファイル→1候補者)
64
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
65
 
66
+ # 4) スキル抽出
67
  merged_text = "\n\n".join([r["text"] for r in partial_records])
68
  skills = extract_skills(merged_text, {
69
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
 
79
  # 6) 品質スコア
80
  score = compute_quality_score(merged_text, merged)
81
 
82
+ # 7) 要約
83
  summaries = summarize_with_openai(merged_text)
84
 
85
  # 8) 構造化出力
 
110
 
111
  anon_pdf = (result_json["candidate_id"] + ".anon.pdf", anon_pdf_bytes)
112
 
113
+ # gr.Code に渡すため、JSONはすべて str で返却
114
  return (
115
  json.dumps(result_json, ensure_ascii=False, indent=2),
116
  json.dumps(skills, ensure_ascii=False, indent=2),
 
131
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
132
  file_count="multiple",
133
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
134
+ type="filepath", # これが必須('file' はNG)
135
  )
136
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
137
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
 
142
  out_json = gr.Code(label="統合出力 (JSON)")
143
 
144
  with gr.Tab("抽出スキル"):
145
+ out_skills = gr.Code(label="スキル一覧 (JSON)") # gr.JSON をやめる
146
 
147
  with gr.Tab("品質スコア"):
148
  out_score = gr.Code(label="品質評価")
 
166
 
167
 
168
  if __name__ == "__main__":
169
+ # HF Spaces では share=True を使わず、引数なしでOK
170
+ demo.launch()