Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,27 +1,47 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import PyPDF2
|
| 3 |
import io
|
|
|
|
| 4 |
import json
|
| 5 |
-
|
|
|
|
| 6 |
from huggingface_hub import login
|
|
|
|
| 7 |
|
| 8 |
# --- Configuration --- #
|
| 9 |
load_dotenv()
|
| 10 |
login(token=os.getenv("HF_TOKEN"))
|
| 11 |
|
| 12 |
def extract_text_from_pdf(pdf_file):
|
| 13 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
try:
|
| 15 |
-
if isinstance(pdf_file, bytes):
|
| 16 |
-
file_bytes = pdf_file
|
| 17 |
-
else:
|
| 18 |
-
raise ValueError("Invalid file format")
|
| 19 |
-
|
| 20 |
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
except Exception as e:
|
| 24 |
-
raise Exception(f"
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def generate_ai_prompt(resume_text, job_desc=None):
|
| 27 |
"""Generates smart analysis prompt for AI"""
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import PyPDF2
|
| 3 |
import io
|
| 4 |
+
import re
|
| 5 |
import json
|
| 6 |
+
import os # Added missing import
|
| 7 |
+
import gc
|
| 8 |
from huggingface_hub import login
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
|
| 11 |
# --- Configuration --- #
|
| 12 |
load_dotenv()
|
| 13 |
login(token=os.getenv("HF_TOKEN"))
|
| 14 |
|
| 15 |
def extract_text_from_pdf(pdf_file):
|
| 16 |
+
"""Extract text from PDF with detailed error handling"""
|
| 17 |
+
if pdf_file is None:
|
| 18 |
+
raise ValueError("No PDF file uploaded")
|
| 19 |
+
|
| 20 |
+
# Handle both file path and bytes input
|
| 21 |
+
if isinstance(pdf_file, str):
|
| 22 |
+
with open(pdf_file, 'rb') as f:
|
| 23 |
+
file_bytes = f.read()
|
| 24 |
+
elif isinstance(pdf_file, bytes):
|
| 25 |
+
file_bytes = pdf_file
|
| 26 |
+
else:
|
| 27 |
+
raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
|
| 28 |
+
|
| 29 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
|
| 31 |
+
if len(pdf_reader.pages) == 0:
|
| 32 |
+
raise ValueError("PDF has no pages")
|
| 33 |
+
|
| 34 |
+
text = "\n".join(page.extract_text() for page in pdf_reader.pages)
|
| 35 |
+
if text is None or text.strip() == "":
|
| 36 |
+
raise ValueError("No text extracted from PDF (possibly image-based or empty)")
|
| 37 |
+
|
| 38 |
+
return text[:10000] # Limit to first 10,000 characters
|
| 39 |
+
except PyPDF2.errors.PdfReadError as e:
|
| 40 |
+
raise Exception(f"PDF read error: {str(e)}")
|
| 41 |
except Exception as e:
|
| 42 |
+
raise Exception(f"Extraction error: {str(e)}")
|
| 43 |
+
finally:
|
| 44 |
+
gc.collect()
|
| 45 |
|
| 46 |
def generate_ai_prompt(resume_text, job_desc=None):
|
| 47 |
"""Generates smart analysis prompt for AI"""
|