Corin1998 commited on
Commit
44d16b3
·
verified ·
1 Parent(s): 63577a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -13
app.py CHANGED
@@ -27,23 +27,25 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
27
  partial_records = []
28
  raw_texts = []
29
 
30
- for f in files:
31
- raw_bytes = f.read()
32
- filetype = detect_filetype(f.name, raw_bytes)
 
 
 
 
33
 
34
  # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
35
  if filetype in {"pdf", "image"}:
36
- text = extract_text_with_openai(raw_bytes, filename=f.name, filetype=filetype)
37
  else:
38
  base_text = load_doc_text(filetype, raw_bytes)
39
- # 生テキストをそのままOpenAIへ渡し、軽く整形した全文を返す
40
- text = extract_text_with_openai(base_text.encode("utf-8"), filename=f.name, filetype="txt")
41
 
42
- raw_texts.append({"filename": f.name, "text": text})
43
 
44
- # 2) OpenAIでセクション構造化
45
  structured = structure_with_openai(text)
46
- # 念のためルールベース正規化も適用(期間抽出など補助)
47
  normalized = normalize_resume({
48
  "work_experience": structured.get("work_experience_raw", ""),
49
  "education": structured.get("education_raw", ""),
@@ -51,7 +53,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
51
  "skills": ", ".join(structured.get("skills_list", [])),
52
  })
53
  partial_records.append({
54
- "source": f.name,
55
  "text": text,
56
  "structured": structured,
57
  "normalized": normalized,
@@ -82,7 +84,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
82
  # 8) 構造化出力
83
  result_json = {
84
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
85
- "files": [f.name for f in files],
86
  "merged": merged,
87
  "skills": skills,
88
  "quality_score": score,
@@ -123,8 +125,12 @@ with gr.Blocks(title=APP_TITLE) as demo:
123
  gr.Markdown(f"# {APP_TITLE}\n複数ファイルを統合→OpenAIで読み込み/構造化/要約→匿名化→Datasets保存")
124
 
125
  with gr.Row():
126
- in_files = gr.Files(label="レジュメ類 (PDF/画像/Word/テキスト) 複数可", file_count="multiple",
127
- file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"], type="file")
 
 
 
 
128
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
129
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
130
 
 
27
  partial_records = []
28
  raw_texts = []
29
 
30
+ for path in files: # ← 'filepath' なので文字列パス
31
+ filepath = str(path)
32
+ filename = os.path.basename(filepath)
33
+ with open(filepath, "rb") as fp:
34
+ raw_bytes = fp.read()
35
+
36
+ filetype = detect_filetype(filename, raw_bytes)
37
 
38
  # 1) テキスト抽出:画像/PDFはOpenAI Vision OCR、docx/txtは生文面+OpenAI整形
39
  if filetype in {"pdf", "image"}:
40
+ text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
41
  else:
42
  base_text = load_doc_text(filetype, raw_bytes)
43
+ text = extract_text_with_openai(base_text.encode("utf-8"), filename=filename, filetype="txt")
 
44
 
45
+ raw_texts.append({"filename": filename, "text": text})
46
 
47
+ # 2) OpenAIでセクション構造化 → ルール正規化
48
  structured = structure_with_openai(text)
 
49
  normalized = normalize_resume({
50
  "work_experience": structured.get("work_experience_raw", ""),
51
  "education": structured.get("education_raw", ""),
 
53
  "skills": ", ".join(structured.get("skills_list", [])),
54
  })
55
  partial_records.append({
56
+ "source": filename,
57
  "text": text,
58
  "structured": structured,
59
  "normalized": normalized,
 
84
  # 8) 構造化出力
85
  result_json = {
86
  "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
87
+ "files": [os.path.basename(p) for p in files],
88
  "merged": merged,
89
  "skills": skills,
90
  "quality_score": score,
 
125
  gr.Markdown(f"# {APP_TITLE}\n複数ファイルを統合→OpenAIで読み込み/構造化/要約→匿名化→Datasets保存")
126
 
127
  with gr.Row():
128
+ in_files = gr.Files(
129
+ label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
130
+ file_count="multiple",
131
+ file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
132
+ type="filepath", # ← 修正点
133
+ )
134
  candidate_id = gr.Textbox(label="候補者ID(任意。未入力なら自動生成)")
135
  notes = gr.Textbox(label="補足メモ(任意)", lines=3)
136