dindizz commited on
Commit
b1dd7a1
·
verified ·
1 Parent(s): 186b258

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -23,15 +23,20 @@ def extract_text_from_docx(docx_file):
23
  def generate_roast(resume_text):
24
  tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
25
  model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
 
 
 
26
 
27
- # Tokenize the resume text and truncate it to 2048 tokens
28
- inputs = tokenizer(resume_text, return_tensors="pt", truncation=True, max_length=2048)
 
29
 
30
  # Convert the tokenized inputs back to text for the prompt
31
  truncated_resume_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
32
 
33
- prompt = f"Roast this resume:\n\n{truncated_resume_text}\n\nRoast:"
34
-
 
35
  generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
36
 
37
  # Generate roast
@@ -41,6 +46,7 @@ def generate_roast(resume_text):
41
 
42
 
43
 
 
44
  # Gradio interface function
45
  def roast_resume(file):
46
  if file.name.endswith('.pdf'):
 
23
  def generate_roast(resume_text):
24
  tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
25
  model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
26
+
27
+ # Define the prompt separately
28
+ prompt_text = "Roast this resume:\n\n"
29
 
30
+ # Tokenize the prompt and resume text, truncating to fit within the 2048 token limit
31
+ max_tokens = 2048 - len(tokenizer(prompt_text)['input_ids']) # Reserve space for the prompt
32
+ inputs = tokenizer(resume_text, return_tensors="pt", truncation=True, max_length=max_tokens)
33
 
34
  # Convert the tokenized inputs back to text for the prompt
35
  truncated_resume_text = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
36
 
37
+ # Create the final prompt by combining the prompt and the truncated resume
38
+ prompt = f"{prompt_text}{truncated_resume_text}\n\nRoast:"
39
+
40
  generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
41
 
42
  # Generate roast
 
46
 
47
 
48
 
49
+
50
  # Gradio interface function
51
  def roast_resume(file):
52
  if file.name.endswith('.pdf'):