Corin1998 commited on
Commit
d04444a
·
verified ·
1 Parent(s): 2ad7640

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -27,7 +27,8 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
27
  partial_records = []
28
  raw_texts = []
29
 
30
- for path in files: # 'filepath' なので文字列パス
 
31
  filepath = str(path)
32
  filename = os.path.basename(filepath)
33
  with open(filepath, "rb") as fp:
@@ -35,7 +36,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
35
 
36
  filetype = detect_filetype(filename, raw_bytes)
37
 
38
- # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
39
  if filetype in {"pdf", "image"}:
40
  text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
41
  else:
@@ -44,7 +45,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
44
 
45
  raw_texts.append({"filename": filename, "text": text})
46
 
47
- # 2) OpenAIでセクション構造化ルール正規化
48
  structured = structure_with_openai(text)
49
  normalized = normalize_resume({
50
  "work_experience": structured.get("work_experience_raw", ""),
@@ -59,10 +60,10 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
59
  "normalized": normalized,
60
  })
61
 
62
- # 3) 統合(複数ファイル→1候補者)
63
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
64
 
65
- # 4) スキル抽出(辞書/正規表現)
66
  merged_text = "\n\n".join([r["text"] for r in partial_records])
67
  skills = extract_skills(merged_text, {
68
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
@@ -78,7 +79,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
78
  # 6) 品質スコア
79
  score = compute_quality_score(merged_text, merged)
80
 
81
- # 7) 要約(300/100/1文)
82
  summaries = summarize_with_openai(merged_text)
83
 
84
  # 8) 構造化出力
@@ -111,7 +112,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
111
 
112
  return (
113
  json.dumps(result_json, ensure_ascii=False, indent=2),
114
- skills,
115
  json.dumps(score, ensure_ascii=False, indent=2),
116
  summaries["300chars"],
117
  summaries["100chars"],
@@ -129,7 +130,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
129
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
130
  file_count="multiple",
131
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
132
- type="filepath", # ← 修正点
133
  )
134
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
135
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
@@ -140,7 +141,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
140
  out_json = gr.Code(label="統合出力 (JSON)")
141
 
142
  with gr.Tab("抽出スキル"):
143
- out_skills = gr.JSON(label="スキル一覧")
144
 
145
  with gr.Tab("品質スコア"):
146
  out_score = gr.Code(label="品質評価")
@@ -164,4 +165,5 @@ with gr.Blocks(title=APP_TITLE) as demo:
164
 
165
 
166
  if __name__ == "__main__":
167
- demo.launch()
 
 
27
  partial_records = []
28
  raw_texts = []
29
 
30
+ # files 'filepath' 前提
31
+ for path in files:
32
  filepath = str(path)
33
  filename = os.path.basename(filepath)
34
  with open(filepath, "rb") as fp:
 
36
 
37
  filetype = detect_filetype(filename, raw_bytes)
38
 
39
+ # 1) 抽出
40
  if filetype in {"pdf", "image"}:
41
  text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
42
  else:
 
45
 
46
  raw_texts.append({"filename": filename, "text": text})
47
 
48
+ # 2) 構造化正規化
49
  structured = structure_with_openai(text)
50
  normalized = normalize_resume({
51
  "work_experience": structured.get("work_experience_raw", ""),
 
60
  "normalized": normalized,
61
  })
62
 
63
+ # 3) 統合
64
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
65
 
66
+ # 4) スキル抽出
67
  merged_text = "\n\n".join([r["text"] for r in partial_records])
68
  skills = extract_skills(merged_text, {
69
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
 
79
  # 6) 品質スコア
80
  score = compute_quality_score(merged_text, merged)
81
 
82
+ # 7) 要約
83
  summaries = summarize_with_openai(merged_text)
84
 
85
  # 8) 構造化出力
 
112
 
113
  return (
114
  json.dumps(result_json, ensure_ascii=False, indent=2),
115
+ json.dumps(skills, ensure_ascii=False, indent=2), # ← gr.Code に渡すため文字列化
116
  json.dumps(score, ensure_ascii=False, indent=2),
117
  summaries["300chars"],
118
  summaries["100chars"],
 
130
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
131
  file_count="multiple",
132
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
133
+ type="filepath",
134
  )
135
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
136
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
 
141
  out_json = gr.Code(label="統合出力 (JSON)")
142
 
143
  with gr.Tab("抽出スキル"):
144
+ out_skills = gr.Code(label="スキル一覧 (JSON)") # ← gr.JSON から gr.Code に変更
145
 
146
  with gr.Tab("品質スコア"):
147
  out_score = gr.Code(label="品質評価")
 
165
 
166
 
167
  if __name__ == "__main__":
168
+ # Spaces では localhost に直接アクセスできないケースがあるため share=True を明示
169
+ demo.launch(share=True)