Spaces:

Corin1998
/

HFResumeIntakeSystem

Runtime error

App Files Files Community

Corin1998 commited on Nov 23, 2025

Commit

44d16b3

verified ·

1 Parent(s): 63577a1

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -13

app.py CHANGED Viewed

@@ -27,23 +27,25 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
     partial_records = []
     raw_texts = []
-    for f in files:
-        raw_bytes = f.read()
-        filetype = detect_filetype(f.name, raw_bytes)
         # 1) テキスト抽出：画像/PDFはOpenAI Vision OCR、docx/txtは生文面＋OpenAI整形
         if filetype in {"pdf", "image"}:
-            text = extract_text_with_openai(raw_bytes, filename=f.name, filetype=filetype)
         else:
             base_text = load_doc_text(filetype, raw_bytes)
-            # 生テキストをそのままOpenAIへ渡し、軽く整形した全文を返す
-            text = extract_text_with_openai(base_text.encode("utf-8"), filename=f.name, filetype="txt")
-        raw_texts.append({"filename": f.name, "text": text})
-        # 2) OpenAIでセクション構造化
         structured = structure_with_openai(text)
-        # 念のためルールベース正規化も適用（期間抽出など補助）
         normalized = normalize_resume({
             "work_experience": structured.get("work_experience_raw", ""),
             "education": structured.get("education_raw", ""),
@@ -51,7 +53,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
             "skills": ", ".join(structured.get("skills_list", [])),
         })
         partial_records.append({
-            "source": f.name,
             "text": text,
             "structured": structured,
             "normalized": normalized,
@@ -82,7 +84,7 @@ def process_resumes(files, candidate_id: str, additional_notes: str = ""):
     # 8) 構造化出力
     result_json = {
         "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
-        "files": [f.name for f in files],
         "merged": merged,
         "skills": skills,
         "quality_score": score,
@@ -123,8 +125,12 @@ with gr.Blocks(title=APP_TITLE) as demo:
     gr.Markdown(f"# {APP_TITLE}\n複数ファイルを統合→OpenAIで読み込み/構造化/要約→匿名化→Datasets保存")
     with gr.Row():
-        in_files = gr.Files(label="レジュメ類 (PDF/画像/Word/テキスト) 複数可", file_count="multiple",
-                            file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"], type="file")
         candidate_id = gr.Textbox(label="候補者ID（任意。未入力なら自動生成）")
     notes = gr.Textbox(label="補足メモ（任意）", lines=3)

     partial_records = []
     raw_texts = []
+    for path in files:  # ← 'filepath' なので文字列パス
+        filepath = str(path)
+        filename = os.path.basename(filepath)
+        with open(filepath, "rb") as fp:
+            raw_bytes = fp.read()
+        filetype = detect_filetype(filename, raw_bytes)
         # 1) テキスト抽出：画像/PDFはOpenAI Vision OCR、docx/txtは生文面＋OpenAI整形
         if filetype in {"pdf", "image"}:
+            text = extract_text_with_openai(raw_bytes, filename=filename, filetype=filetype)
         else:
             base_text = load_doc_text(filetype, raw_bytes)
+            text = extract_text_with_openai(base_text.encode("utf-8"), filename=filename, filetype="txt")
+        raw_texts.append({"filename": filename, "text": text})
+        # 2) OpenAIでセクション構造化 → ルール正規化
         structured = structure_with_openai(text)
         normalized = normalize_resume({
             "work_experience": structured.get("work_experience_raw", ""),
             "education": structured.get("education_raw", ""),
             "skills": ", ".join(structured.get("skills_list", [])),
         })
         partial_records.append({
+            "source": filename,
             "text": text,
             "structured": structured,
             "normalized": normalized,
     # 8) 構造化出力
     result_json = {
         "candidate_id": candidate_id or hashlib.sha256(merged_text.encode("utf-8")).hexdigest()[:16],
+        "files": [os.path.basename(p) for p in files],
         "merged": merged,
         "skills": skills,
         "quality_score": score,
     gr.Markdown(f"# {APP_TITLE}\n複数ファイルを統合→OpenAIで読み込み/構造化/要約→匿名化→Datasets保存")
     with gr.Row():
+        in_files = gr.Files(
+            label="レジュメ類 (PDF/画像/Word/テキスト) 複数可",
+            file_count="multiple",
+            file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".docx", ".txt"],
+            type="filepath",  # ← 修正点
+        )
         candidate_id = gr.Textbox(label="候補者ID（任意。未入力なら自動生成）")
     notes = gr.Textbox(label="補足メモ（任意）", lines=3)