Corin1998 commited on
Commit
865007c
·
verified ·
1 Parent(s): b6b2d84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -27,7 +27,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
27
  partial_records = []
28
  raw_texts = []
29
 
30
- # files は 'filepath'(文字列パス)
31
  for path in files:
32
  filepath = str(path)
33
  filename = os.path.basename(filepath)
@@ -36,16 +36,17 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
36
 
37
  filetype = detect_filetype(filename, raw_bytes)
38
 
39
- # 1) テキスト抽出
40
  if filetype in {"pdf", "image"}:
41
  text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
42
- else:
43
  base_text = load_doc_text(filetype, raw_bytes)
 
44
  text = extract_text_with_openai(base_text.encode("utf-8"), filename=filename, filetype="txt")
45
 
46
  raw_texts.append({"filename": filename, "text": text})
47
 
48
- # 2) 構造化正規化
49
  structured = structure_with_openai(text)
50
  normalized = normalize_resume({
51
  "work_experience": structured.get("work_experience_raw", ""),
@@ -60,10 +61,10 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
60
  "normalized": normalized,
61
  })
62
 
63
- # 3) 統合
64
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
65
 
66
- # 4) スキル抽出
67
  merged_text = "\n\n".join([r["text"] for r in partial_records])
68
  skills = extract_skills(merged_text, {
69
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
@@ -79,7 +80,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
79
  # 6) 品質スコア
80
  score = compute_quality_score(merged_text, merged)
81
 
82
- # 7) 要約
83
  summaries = summarize_with_openai(merged_text)
84
 
85
  # 8) 構造化出力
@@ -94,7 +95,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
94
  "notes": additional_notes,
95
  }
96
 
97
- # 9) HF Datasets 保存
98
  dataset_repo = os.environ.get("DATASET_REPO")
99
  commit_info = None
100
  if dataset_repo:
@@ -112,7 +113,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
112
 
113
  return (
114
  json.dumps(result_json, ensure_ascii=False, indent=2),
115
- json.dumps(skills, ensure_ascii=False, indent=2), # Codeに渡すため文字列に
116
  json.dumps(score, ensure_ascii=False, indent=2),
117
  summaries["300chars"],
118
  summaries["100chars"],
@@ -130,7 +131,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
130
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
131
  file_count="multiple",
132
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
133
- type="filepath", # 重要:'file' ではなく 'filepath'
134
  )
135
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
136
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
@@ -141,7 +142,7 @@ with gr.Blocks(title=APP_TITLE) as demo:
141
  out_json = gr.Code(label="統合出力 (JSON)")
142
 
143
  with gr.Tab("抽出スキル"):
144
- out_skills = gr.Code(label="スキル一覧 (JSON)") # gr.JSON から変更
145
 
146
  with gr.Tab("品質スコア"):
147
  out_score = gr.Code(label="品質評価")
@@ -165,4 +166,5 @@ with gr.Blocks(title=APP_TITLE) as demo:
165
 
166
 
167
  if __name__ == "__main__":
168
- demo.launch(share=True) # ← 必須
 
 
27
  partial_records = []
28
  raw_texts = []
29
 
30
+ # files は 'filepath' 前提(str パスが来る)
31
  for path in files:
32
  filepath = str(path)
33
  filename = os.path.basename(filepath)
 
36
 
37
  filetype = detect_filetype(filename, raw_bytes)
38
 
39
+ # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
40
  if filetype in {"pdf", "image"}:
41
  text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
42
+ else:
43
  base_text = load_doc_text(filetype, raw_bytes)
44
+ # 生テキストをOpenAIへ渡し、整形済み本文を取得
45
  text = extract_text_with_openai(base_text.encode("utf-8"), filename=filename, filetype="txt")
46
 
47
  raw_texts.append({"filename": filename, "text": text})
48
 
49
+ # 2) OpenAIでセクション構造化ルール正規化
50
  structured = structure_with_openai(text)
51
  normalized = normalize_resume({
52
  "work_experience": structured.get("work_experience_raw", ""),
 
61
  "normalized": normalized,
62
  })
63
 
64
+ # 3) 統合(複数ファイル→1候補者)
65
  merged = merge_normalized_records([r["normalized"] for r in partial_records])
66
 
67
+ # 4) スキル抽出(辞書/正規表現)
68
  merged_text = "\n\n".join([r["text"] for r in partial_records])
69
  skills = extract_skills(merged_text, {
70
  "work_experience": merged.get("raw_sections", {}).get("work_experience", ""),
 
80
  # 6) 品質スコア
81
  score = compute_quality_score(merged_text, merged)
82
 
83
+ # 7) 要約(300/100/1文)
84
  summaries = summarize_with_openai(merged_text)
85
 
86
  # 8) 構造化出力
 
95
  "notes": additional_notes,
96
  }
97
 
98
+ # 9) HF Datasets 保存(任意)
99
  dataset_repo = os.environ.get("DATASET_REPO")
100
  commit_info = None
101
  if dataset_repo:
 
113
 
114
  return (
115
  json.dumps(result_json, ensure_ascii=False, indent=2),
116
+ json.dumps(skills, ensure_ascii=False, indent=2), # gr.Code に渡すため文字列化
117
  json.dumps(score, ensure_ascii=False, indent=2),
118
  summaries["300chars"],
119
  summaries["100chars"],
 
131
  label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
132
  file_count="multiple",
133
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
134
+ type="filepath", # Gradio 4.44系は 'filepath' or 'binary'
135
  )
136
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
137
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
 
142
  out_json = gr.Code(label="統合出力 (JSON)")
143
 
144
  with gr.Tab("抽出スキル"):
145
+ out_skills = gr.Code(label="スキル一覧 (JSON)") # gr.JSON は 4.44系でスキーマ例外が出ることがあるため回避
146
 
147
  with gr.Tab("品質スコア"):
148
  out_score = gr.Code(label="品質評価")
 
166
 
167
 
168
  if __name__ == "__main__":
169
+ # Spaces 環境では共有URL必須になる場合があるため share=True を明示
170
+ demo.launch(share=True)