File size: 2,940 Bytes
7657004
2731e27
867efcc
 
 
7657004
2731e27
7657004
 
 
 
2731e27
867efcc
2731e27
 
 
 
 
 
 
867efcc
2731e27
 
867efcc
 
b1dd7a1
4ccdb6f
b1dd7a1
867efcc
f015c70
c646512
 
e0548b6
4ccdb6f
e0548b6
 
4ccdb6f
f015c70
e0548b6
4ccdb6f
f015c70
c646512
4ccdb6f
c646512
276031b
4ccdb6f
e0548b6
c646512
867efcc
 
 
4ccdb6f
 
 
 
 
 
 
 
 
 
 
 
 
867efcc
4ccdb6f
2731e27
4ccdb6f
2731e27
 
867efcc
4ccdb6f
 
 
 
 
 
 
 
867efcc
4ccdb6f
867efcc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pdfplumber
import docx
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import gradio as gr

# Function to extract text from PDF using pdfplumber
def extract_text_from_pdf(pdf_file):
    with pdfplumber.open(pdf_file) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Function to extract text from DOCX
def extract_text_from_docx(docx_file):
    doc = docx.Document(docx_file)
    full_text = []
    for paragraph in doc.paragraphs:
        full_text.append(paragraph.text)
    return '\n'.join(full_text)

# Function to generate roast based on resume text
def generate_roast(resume_text):
    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
    model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")

    # Define the prompt
    prompt_text = "Roast this resume:\n\n"
    
    # Tokenize the prompt
    prompt_tokenized = tokenizer(prompt_text, return_tensors="pt")
    prompt_tokens = prompt_tokenized['input_ids'].shape[1]
    
    # Calculate remaining tokens for resume text
    max_resume_tokens = 2048 - prompt_tokens
    
    # Tokenize and truncate resume text
    resume_tokenized = tokenizer(resume_text, truncation=True, max_length=max_resume_tokens, return_tensors="pt")
    
    # Decode the truncated resume text back into a string
    truncated_resume_text = tokenizer.decode(resume_tokenized['input_ids'][0], skip_special_tokens=True)
    
    # Combine prompt and truncated resume text
    final_prompt = f"{prompt_text}{truncated_resume_text}\n\nRoast:"
    
    # Generate roast
    generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
    roast = generator(final_prompt, max_new_tokens=50, num_return_sequences=1)
    
    return roast[0]['generated_text']

# Function to handle file uploads and extract text from resume files
def roast_resume(file=None, resume_text=None):
    if file:
        # Handle file uploads for PDF or DOCX
        if file.name.endswith('.pdf'):
            resume_text = extract_text_from_pdf(file)
        elif file.name.endswith('.docx'):
            resume_text = extract_text_from_docx(file)
        else:
            return "Unsupported file format. Please upload a PDF or DOCX file."
    elif resume_text:
        # Use pasted resume text
        pass
    else:
        return "No resume provided."
    
    # Generate the roast based on extracted or pasted resume text
    roast = generate_roast(resume_text)
    return roast

# Gradio interface with file upload or text input options
interface = gr.Interface(
    fn=roast_resume, 
    inputs=[gr.File(label="Upload Resume (PDF/DOCX)"), gr.Textbox(label="Or Paste Your Resume")], 
    outputs="text", 
    title="Resume Roaster",
    description="Upload your resume in PDF/DOCX format or paste your resume text, and let the AI roast it!"
)

# Launch Gradio app
interface.launch()