Spaces:
Sleeping
Sleeping
| import openai | |
| import gradio as gr | |
| import PyPDF2 | |
| import docx | |
| from docx import Document | |
| import os | |
| from docx import Document | |
| from docx.shared import Pt | |
| from docx.enum.text import WD_UNDERLINE | |
| from docx.enum.text import WD_ALIGN_PARAGRAPH | |
| openai.api_key = "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" | |
| messages = [] | |
| latest_resume_text = "" | |
| desired_position = "" | |
| default_text_1 = "请帮我总结我上传的简历" | |
| default_text_2 = "请帮我把我上传的简历进行脱敏处理。" | |
| default_text_3 = "请帮我阅读候选人简历并总结出以下几个部分。第一个板块是“个人信息”,请总结出此后选人的姓名,性别,工作经验(多少年),最高学历,毕业院校,专业,和毕业时间。第二个部分是此候选人的本人评价,放进一个自然段里。第三个部分是具体得工作经历。第四个部分是做过的项目经验。请根据你对于这位候选人简历的最细致的阅读排出以上几个部分。每个部分由自己的标题:五个标题为 个人信息,本人评价,工作经历,和项目经验。请把每一个小项写得细致一点,并且用数字排序!" | |
| def set_text_1(): | |
| return default_text_1 | |
| def set_text_2(): | |
| return default_text_2 | |
| def set_text_3(): | |
| return default_text_3 | |
| # # extracting text from pdf | |
| # def extract_text_from_pdf(file): | |
| # reader = PyPDF2.PdfReader(file) | |
| # text = "" | |
| # for page in reader.pages: | |
| # text += page.extract_text() | |
| # return text | |
| #write a function that extract text from a pdf that could contain multiple pages | |
| def extract_text_from_pdf(file): | |
| reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # extracting text from doc | |
| def extract_text_from_docx(file): | |
| doc = docx.Document(file) | |
| all_text = [] | |
| for para in doc.paragraphs: | |
| # Extract the text from the current paragraph | |
| paragraph_text = para.text | |
| all_text.append(paragraph_text) | |
| combined_text = " ".join(all_text) | |
| return combined_text | |
| def handle_file_upload(uploaded_files): | |
| combined_text = "" | |
| if uploaded_files is None: | |
| return "" | |
| for uploaded_file in uploaded_files: | |
| file_type = uploaded_file.name.split('.')[-1].lower() | |
| if file_type == 'pdf': | |
| combined_text += extract_text_from_pdf(uploaded_file) | |
| elif file_type in ['docx', 'doc']: | |
| combined_text += extract_text_from_docx(uploaded_file) | |
| else: | |
| combined_text += "Unsupported file format, please upload a PDF or Word document.\n" | |
| return combined_text | |
| def clear_inputs(): | |
| messages = [] | |
| return "", None | |
| def log_conversation(input, reply): | |
| with open("log.csv", "a") as log_file: | |
| log_file.write(f"user_input: {input},\n\n Chatgpt: {reply}\n\n") | |
| import re | |
| from docx import Document | |
| from docx.enum.text import WD_ALIGN_PARAGRAPH | |
| from docx.shared import RGBColor | |
| from docx.enum.text import WD_PARAGRAPH_ALIGNMENT | |
| def is_chinese_char(char): | |
| """Check if a given character is a Chinese character.""" | |
| return '\u4e00' <= char <= '\u9fff' | |
| def count_chinese_chars(text): | |
| """Count the number of Chinese characters in a string.""" | |
| return sum(is_chinese_char(char) for char in text) | |
| def generate_resume_document(latest_resume_text): | |
| # Define keywords that indicate the start of a new section | |
| section_keywords = ["个人信息", "本人评价", "工作经历", "项目经验"] | |
| # Create a new Document | |
| doc = Document() | |
| # Add a title to the document | |
| heading = doc.add_heading('候选人简历', level=0) | |
| run = heading.add_run() | |
| run.underline = WD_UNDERLINE.SINGLE | |
| # Center-align the heading | |
| paragraph_format = heading.paragraph_format | |
| paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER | |
| # Check if the text is empty | |
| if not latest_resume_text.strip(): | |
| doc.add_paragraph("No resume text found") | |
| return None | |
| # Remove non-word characters from the text except for spaces and new lines | |
| # Remove non-word characters from the text except for "-", ",", "." and numbers | |
| cleaned_text = re.sub(r'[^\w\s\-\,\.\d]', '', latest_resume_text) | |
| # Split the resume text into lines | |
| lines = cleaned_text.split('\n') | |
| # Variables to store the current section title and its content | |
| current_section = None | |
| current_content = [] | |
| # Function to add a section to the document with bullet points | |
| def add_section_to_doc(section, content): | |
| global name | |
| if section and content: | |
| sec = doc.add_heading(section, level=0) | |
| for run in sec.runs: | |
| run.font.size = Pt(14) | |
| if section == "个人信息": | |
| # Combine all content lines into a single line separated by a semicolon | |
| combined_content = ';'.join(content) | |
| p = doc.add_paragraph() # Create a new paragraph for combined content | |
| last_index = 0 # Keep track of the last index processed | |
| for keyword in ["姓名", "性别", "工作经验", "最高学历", "毕业院校", "专业", "毕业时间"]: | |
| if keyword in combined_content: | |
| start_index = combined_content.index(keyword, last_index) | |
| # Add text before the keyword as a normal run | |
| p.add_run(combined_content[last_index:start_index]) | |
| # Add the keyword and the colon as a bold run | |
| bold_run = p.add_run(keyword + ':') | |
| bold_run.bold = True | |
| # Update the last index processed | |
| last_index = start_index + len(keyword) | |
| if keyword == "姓名": | |
| name_start_index = last_index | |
| try: | |
| # Try to find the start index of the next keyword "性别" | |
| name_end_index = combined_content.index("性别", name_start_index) | |
| # Store the content into name variable | |
| name = combined_content[name_start_index:name_end_index] | |
| # Remove any non-Chinese characters | |
| name = re.sub(r'[^\u4e00-\u9fff]', '', name) | |
| except ValueError: | |
| # Handle the case where "性别" is not found | |
| name = "需手动填写" | |
| # Add any remaining text after the last keyword | |
| p.add_run(combined_content[last_index:]) | |
| else: | |
| for line in content: | |
| # # Count Chinese characters in the line | |
| # chinese_char_count = count_chinese_chars(line) | |
| # if chinese_char_count < 5: | |
| # doc.add_heading(line, level=0) | |
| # p = doc.add_paragraph() | |
| # p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER | |
| # for run in sec.runs: | |
| # run.font.size = Pt(14) | |
| # else: | |
| doc.add_paragraph(line) | |
| for line in lines: | |
| # Check for section titles (considering case-insensitivity) | |
| if any(line.strip().upper() == keyword for keyword in section_keywords): | |
| # Add the previous section to the document before starting a new one | |
| add_section_to_doc(current_section, current_content) | |
| current_section = line.strip().title() # Title Case for headings | |
| current_content = [] | |
| else: | |
| # Clean line and add to current content | |
| clean_line = line.strip() | |
| if clean_line: # Ignore empty lines | |
| # Append clean_line to current_content, removing any additional internal line breaks | |
| current_content.append(re.sub(r'\s+', ' ', clean_line)) | |
| # Add the last section to the document | |
| add_section_to_doc(current_section, current_content) | |
| # Save the document | |
| word_filename = "博网科技-" + name + "-" + desired_position + ".docx" | |
| doc.save(word_filename) | |
| return word_filename | |
| # create a function that automatically convert a docx file to a pdf file | |
| def convert_to_pdf(word_filename): | |
| pdf_filename = word_filename.replace(".docx", ".pdf") | |
| doc = docx.Document(word_filename) | |
| doc.save(pdf_filename) | |
| return pdf_filename | |
| def CustomChatGPT(user_input, uploaded_file): | |
| global messages, latest_resume_text, desired_position, name # Declare latest_resume_text as global if it's used globally | |
| resume_text = "" | |
| resume_text = handle_file_upload(uploaded_file) | |
| print("Resume text from the file:", resume_text) | |
| if resume_text: | |
| messages.append({"role": "system", "content": resume_text}) | |
| # if the resume text contains "期望职位", extract the desired position and store it in desired_position | |
| if "期望职位" in resume_text: | |
| match = re.search(r'期望职位:(.+?)\s', resume_text) | |
| if match: | |
| desired_position = match.group(1) | |
| # Remove non-Chinese characters | |
| desired_position = re.sub(r'[^\u4e00-\u9fff]', '', desired_position) | |
| else: | |
| desired_position = "需手动填写" | |
| messages.append({"role": "user", "content": user_input}) | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages=messages | |
| ) | |
| ChatGPT_reply = response.choices[0].message.content | |
| messages.append({"role": "assistant", "content": ChatGPT_reply}) | |
| combined_input = user_input + " " + resume_text | |
| log_conversation(combined_input, ChatGPT_reply) | |
| latest_resume_text = ChatGPT_reply | |
| if "请帮我阅读候选人简历" in user_input: | |
| word_filename = generate_resume_document(latest_resume_text) | |
| pdf_filename = convert_to_pdf(word_filename) | |
| ChatGPT_reply = "Resume generated successfully. Please download the file using the link below." | |
| messages = [] | |
| else: | |
| doc = Document() | |
| doc.add_paragraph(ChatGPT_reply) | |
| word_filename = "generated_response.docx" | |
| doc.save(word_filename) | |
| # reset the messages to start a new conversation | |
| messages = [] | |
| return ChatGPT_reply, word_filename | |
| def get_log_file(): | |
| return "log.csv" | |
| def read_log_file(): | |
| if os.path.exists("log.csv"): | |
| with open("log.csv", "r") as file: | |
| return file.read() | |
| else: | |
| return "Log file is empty or doesn't exist." | |
| def clear_log_file(): | |
| if not os.path.exists("log.csv"): | |
| return "file not exist" | |
| else: | |
| with open("log.csv", "w") as log_file: | |
| log_file.write("") | |
| return "Log cleared" | |
| def main(): | |
| custom_css = """ | |
| <style> | |
| .custom-button .mdc-button { | |
| background-color: #90EE90; /* Light Green */ | |
| color: black; | |
| border: none; | |
| border-radius: 6px; | |
| padding: 10px 20px; | |
| margin: 8px 0; | |
| cursor: pointer; | |
| font-size: 14px; | |
| text-align: center; | |
| } | |
| .custom-button .mdc-button:hover { | |
| background-color: #76b476; /* Darker shade for hover effect */ | |
| } | |
| </style> | |
| """ | |
| with gr.Blocks(css=custom_css) as demo: | |
| gr.Markdown("# 简历分析系统", elem_classes=["centered-title"]) | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox() | |
| file_input = gr.File(file_count="multiple", label="Upload Resume") | |
| with gr.Row(): | |
| btn1 = gr.Button("总结简历", elem_classes=["custom-button"]) | |
| btn2 = gr.Button("脱敏处理", elem_classes=["custom-button"]) | |
| btn3 = gr.Button("简历生成", elem_classes=["custom-button"]) | |
| btn1.click(fn=set_text_1, inputs=[], outputs=text_input) | |
| btn2.click(fn=set_text_2, inputs=[], outputs=text_input) | |
| btn3.click(fn=set_text_3, inputs=[], outputs=text_input) | |
| with gr.Row(): | |
| submit_btn = gr.Button("提交", elem_classes=["custom-button"]) | |
| clear_btn = gr.Button("清楚", elem_classes=["custom-button"]) | |
| clear_btn.click(fn=clear_inputs, inputs=[], outputs=[]) | |
| with gr.Column(): | |
| with gr.Row(): | |
| output_text = gr.Textbox(label="ChatGPT回复", interactive=True, lines=1) | |
| with gr.Row(): | |
| output_word = gr.File(label="下载word文件") | |
| # output_pdf = gr.File(label="Download PDF File") | |
| submit_btn.click(fn=CustomChatGPT, inputs=[text_input, file_input], outputs=[output_text, output_word]) | |
| with gr.Row(): | |
| log_text = gr.Textbox(label="Log Content", interactive=True, lines=1) | |
| with gr.Row(): | |
| view_log_button = gr.Button("对话记录", elem_classes=["custom-button"]) | |
| download_log_button = gr.Button("下载对话记录", elem_classes=["custom-button"]) | |
| clear_log_button = gr.Button("清楚对话记录", elem_classes=["custom-button"]) | |
| view_log_button.click(fn=read_log_file, inputs=[], outputs=log_text) | |
| download_log_button.click(fn=get_log_file, inputs=[], outputs=[]) | |
| clear_log_button.click(fn=clear_log_file, inputs=[], outputs=[]) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| main() |