Spaces:
Sleeping
Sleeping
File size: 2,940 Bytes
7657004 2731e27 867efcc 7657004 2731e27 7657004 2731e27 867efcc 2731e27 867efcc 2731e27 867efcc b1dd7a1 4ccdb6f b1dd7a1 867efcc f015c70 c646512 e0548b6 4ccdb6f e0548b6 4ccdb6f f015c70 e0548b6 4ccdb6f f015c70 c646512 4ccdb6f c646512 276031b 4ccdb6f e0548b6 c646512 867efcc 4ccdb6f 867efcc 4ccdb6f 2731e27 4ccdb6f 2731e27 867efcc 4ccdb6f 867efcc 4ccdb6f 867efcc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import pdfplumber
import docx
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# Function to extract text from PDF using pdfplumber
def extract_text_from_pdf(pdf_file):
with pdfplumber.open(pdf_file) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text()
return text
# Function to extract text from DOCX
def extract_text_from_docx(docx_file):
doc = docx.Document(docx_file)
full_text = []
for paragraph in doc.paragraphs:
full_text.append(paragraph.text)
return '\n'.join(full_text)
# Function to generate roast based on resume text
def generate_roast(resume_text):
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
# Define the prompt
prompt_text = "Roast this resume:\n\n"
# Tokenize the prompt
prompt_tokenized = tokenizer(prompt_text, return_tensors="pt")
prompt_tokens = prompt_tokenized['input_ids'].shape[1]
# Calculate remaining tokens for resume text
max_resume_tokens = 2048 - prompt_tokens
# Tokenize and truncate resume text
resume_tokenized = tokenizer(resume_text, truncation=True, max_length=max_resume_tokens, return_tensors="pt")
# Decode the truncated resume text back into a string
truncated_resume_text = tokenizer.decode(resume_tokenized['input_ids'][0], skip_special_tokens=True)
# Combine prompt and truncated resume text
final_prompt = f"{prompt_text}{truncated_resume_text}\n\nRoast:"
# Generate roast
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
roast = generator(final_prompt, max_new_tokens=50, num_return_sequences=1)
return roast[0]['generated_text']
# Function to handle file uploads and extract text from resume files
def roast_resume(file=None, resume_text=None):
if file:
# Handle file uploads for PDF or DOCX
if file.name.endswith('.pdf'):
resume_text = extract_text_from_pdf(file)
elif file.name.endswith('.docx'):
resume_text = extract_text_from_docx(file)
else:
return "Unsupported file format. Please upload a PDF or DOCX file."
elif resume_text:
# Use pasted resume text
pass
else:
return "No resume provided."
# Generate the roast based on extracted or pasted resume text
roast = generate_roast(resume_text)
return roast
# Gradio interface with file upload or text input options
interface = gr.Interface(
fn=roast_resume,
inputs=[gr.File(label="Upload Resume (PDF/DOCX)"), gr.Textbox(label="Or Paste Your Resume")],
outputs="text",
title="Resume Roaster",
description="Upload your resume in PDF/DOCX format or paste your resume text, and let the AI roast it!"
)
# Launch Gradio app
interface.launch()
|