Spaces:
Sleeping
Sleeping
| import pdfplumber | |
| import docx | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| import gradio as gr | |
| # Function to extract text from PDF using pdfplumber | |
| def extract_text_from_pdf(pdf_file): | |
| with pdfplumber.open(pdf_file) as pdf: | |
| text = '' | |
| for page in pdf.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to extract text from DOCX | |
| def extract_text_from_docx(docx_file): | |
| doc = docx.Document(docx_file) | |
| full_text = [] | |
| for paragraph in doc.paragraphs: | |
| full_text.append(paragraph.text) | |
| return '\n'.join(full_text) | |
| # Function to generate roast based on resume text | |
| def generate_roast(resume_text): | |
| tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") | |
| model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") | |
| # Define the prompt | |
| prompt_text = "Roast this resume:\n\n" | |
| # Tokenize the prompt | |
| prompt_tokenized = tokenizer(prompt_text, return_tensors="pt") | |
| prompt_tokens = prompt_tokenized['input_ids'].shape[1] | |
| # Calculate remaining tokens for resume text | |
| max_resume_tokens = 2048 - prompt_tokens | |
| # Tokenize and truncate resume text | |
| resume_tokenized = tokenizer(resume_text, truncation=True, max_length=max_resume_tokens, return_tensors="pt") | |
| # Decode the truncated resume text back into a string | |
| truncated_resume_text = tokenizer.decode(resume_tokenized['input_ids'][0], skip_special_tokens=True) | |
| # Combine prompt and truncated resume text | |
| final_prompt = f"{prompt_text}{truncated_resume_text}\n\nRoast:" | |
| # Generate roast | |
| generator = pipeline('text-generation', model=model, tokenizer=tokenizer) | |
| roast = generator(final_prompt, max_new_tokens=50, num_return_sequences=1) | |
| return roast[0]['generated_text'] | |
| # Function to handle file uploads and extract text from resume files | |
| def roast_resume(file=None, resume_text=None): | |
| if file: | |
| # Handle file uploads for PDF or DOCX | |
| if file.name.endswith('.pdf'): | |
| resume_text = extract_text_from_pdf(file) | |
| elif file.name.endswith('.docx'): | |
| resume_text = extract_text_from_docx(file) | |
| else: | |
| return "Unsupported file format. Please upload a PDF or DOCX file." | |
| elif resume_text: | |
| # Use pasted resume text | |
| pass | |
| else: | |
| return "No resume provided." | |
| # Generate the roast based on extracted or pasted resume text | |
| roast = generate_roast(resume_text) | |
| return roast | |
| # Gradio interface with file upload or text input options | |
| interface = gr.Interface( | |
| fn=roast_resume, | |
| inputs=[gr.File(label="Upload Resume (PDF/DOCX)"), gr.Textbox(label="Or Paste Your Resume")], | |
| outputs="text", | |
| title="Resume Roaster", | |
| description="Upload your resume in PDF/DOCX format or paste your resume text, and let the AI roast it!" | |
| ) | |
| # Launch Gradio app | |
| interface.launch() | |