Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,31 +2,37 @@ import gradio as gr
|
|
| 2 |
import fitz # PyMuPDF
|
| 3 |
import openai
|
| 4 |
import re
|
| 5 |
-
|
| 6 |
import os
|
|
|
|
|
|
|
| 7 |
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
| 8 |
|
| 9 |
-
# ---
|
| 10 |
def clean_text(text):
|
| 11 |
text = re.sub(r'^\s*\d+\s*$', '', text, flags=re.MULTILINE)
|
| 12 |
text = re.sub(r'\n\s*\n+', '\n\n', text)
|
| 13 |
text = re.sub(r'[ \t]+', ' ', text)
|
| 14 |
return text.strip()
|
| 15 |
|
| 16 |
-
# --- 擷取 PDF 文字 ---
|
| 17 |
def extract_text_from_pdf(file):
|
| 18 |
doc = fitz.open(file.name)
|
| 19 |
-
|
| 20 |
for page in doc:
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
| 22 |
doc.close()
|
| 23 |
-
return clean_text(
|
| 24 |
|
| 25 |
-
# --- GPT
|
| 26 |
def analyze_resume(resume_text):
|
| 27 |
prompt = f"""
|
| 28 |
你是一位專業的履歷健檢與職涯輔導顧問,請協助我以下幾件事:
|
| 29 |
|
|
|
|
|
|
|
| 30 |
1. 根據下方履歷內容,幫我整理出:
|
| 31 |
- 學歷背景
|
| 32 |
- 技能與工具
|
|
@@ -61,10 +67,10 @@ def analyze_resume(resume_text):
|
|
| 61 |
content = response['choices'][0]['message']['content']
|
| 62 |
return content.replace("**", "").replace("* ", "• ").replace("*", "").strip()
|
| 63 |
|
| 64 |
-
# --- GPT
|
| 65 |
def generate_autobiography(resume_text):
|
| 66 |
prompt = f"""
|
| 67 |
-
請根據
|
| 68 |
|
| 69 |
履歷內容:
|
| 70 |
------------------------
|
|
@@ -84,10 +90,10 @@ def generate_autobiography(resume_text):
|
|
| 84 |
|
| 85 |
# --- Gradio UI ---
|
| 86 |
with gr.Blocks(title="求職小幫手 AI") as demo:
|
| 87 |
-
gr.Markdown("## 🧑💼 求職小幫手 AI\n
|
| 88 |
|
| 89 |
with gr.Row():
|
| 90 |
-
pdf_input = gr.File(label="📄 上傳履歷 PDF
|
| 91 |
extract_btn = gr.Button("⬇️ 擷取與清理內容")
|
| 92 |
|
| 93 |
resume_textbox = gr.Textbox(label="📄 擷取後的履歷純文字", lines=20, interactive=False)
|
|
|
|
| 2 |
import fitz # PyMuPDF
|
| 3 |
import openai
|
| 4 |
import re
|
|
|
|
| 5 |
import os
|
| 6 |
+
|
| 7 |
+
# ✅ 使用 Hugging Face Secrets 管理 API 金鑰
|
| 8 |
openai.api_key = os.environ.get("OPENAI_API_KEY")
|
| 9 |
|
| 10 |
+
# --- 清理文字 ---
|
| 11 |
def clean_text(text):
|
| 12 |
text = re.sub(r'^\s*\d+\s*$', '', text, flags=re.MULTILINE)
|
| 13 |
text = re.sub(r'\n\s*\n+', '\n\n', text)
|
| 14 |
text = re.sub(r'[ \t]+', ' ', text)
|
| 15 |
return text.strip()
|
| 16 |
|
| 17 |
+
# --- 改進版:區塊式擷取 PDF 文字 + 座標排序 ---
|
| 18 |
def extract_text_from_pdf(file):
|
| 19 |
doc = fitz.open(file.name)
|
| 20 |
+
text_blocks = []
|
| 21 |
for page in doc:
|
| 22 |
+
blocks = page.get_text("blocks") # 回傳每段區塊含座標
|
| 23 |
+
blocks = sorted(blocks, key=lambda b: (b[1], b[0])) # 先依 y,再依 x 排序
|
| 24 |
+
for b in blocks:
|
| 25 |
+
text_blocks.append(b[4])
|
| 26 |
doc.close()
|
| 27 |
+
return clean_text("\n".join(text_blocks))
|
| 28 |
|
| 29 |
+
# --- GPT 履歷分析(STAR 法則)---
|
| 30 |
def analyze_resume(resume_text):
|
| 31 |
prompt = f"""
|
| 32 |
你是一位專業的履歷健檢與職涯輔導顧問,請協助我以下幾件事:
|
| 33 |
|
| 34 |
+
注意:這份履歷是由 PDF 擷取而來,可能因版面問題導致部分文字順序略有錯亂。請你盡量根據語意邏輯判斷內容。
|
| 35 |
+
|
| 36 |
1. 根據下方履歷內容,幫我整理出:
|
| 37 |
- 學歷背景
|
| 38 |
- 技能與工具
|
|
|
|
| 67 |
content = response['choices'][0]['message']['content']
|
| 68 |
return content.replace("**", "").replace("* ", "• ").replace("*", "").strip()
|
| 69 |
|
| 70 |
+
# --- GPT 自傳撰寫 ---
|
| 71 |
def generate_autobiography(resume_text):
|
| 72 |
prompt = f"""
|
| 73 |
+
這是從 PDF 擷取的履歷文字,順序可能略有錯亂。請盡量根據語意整理,幫我撰寫一段約 300 字的個人自傳。口吻自然、自信,強調個人特質、學習歷程與職涯目標,可作為履歷中的「自我介紹」使用:
|
| 74 |
|
| 75 |
履歷內容:
|
| 76 |
------------------------
|
|
|
|
| 90 |
|
| 91 |
# --- Gradio UI ---
|
| 92 |
with gr.Blocks(title="求職小幫手 AI") as demo:
|
| 93 |
+
gr.Markdown("## 🧑💼 求職小幫手 AI\n上傳履歷 PDF,我們幫你分析亮點並撰寫個人自傳!")
|
| 94 |
|
| 95 |
with gr.Row():
|
| 96 |
+
pdf_input = gr.File(label="📄 上傳履歷 PDF", file_types=[".pdf"])
|
| 97 |
extract_btn = gr.Button("⬇️ 擷取與清理內容")
|
| 98 |
|
| 99 |
resume_textbox = gr.Textbox(label="📄 擷取後的履歷純文字", lines=20, interactive=False)
|