Spaces:
Runtime error
Runtime error
File size: 1,749 Bytes
715cf18 496da0f 0259ef3 715cf18 0259ef3 496da0f 0259ef3 38b54e1 0259ef3 38b54e1 0259ef3 38b54e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
# from PyMuPDF import fitz
import pdfplumber
import os
import re
from joblib import load
def extract_text_from_pdf(file_path):
text = ""
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
text += page.extract_text()
return text
def preprocess_text(text):
resumeText = re.sub('https\+S\s*','',text)
resumeText = re.sub('RT|cc','',resumeText)
resumeText = re.sub('#\S+','',resumeText)
resumeText = re.sub('@\+S','',resumeText)
resumeText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', resumeText)
resumeText = re.sub(r'[^\x00-\x7f]',r' ', resumeText)
resumeText = re.sub('\s+', ' ', resumeText)
return resumeText
def load_model():
model = load('Resume_Screening.joblib')
return model
vectorizer = load('Word_Vec_Resume.joblib')
actual_labels = [
"Advocate", "Arts", "Automation Testing", "Blockchain", "Business Analyst",
"Civil Engineer", "Data Science", "Database", "DevOps Engineer", "DotNet Developer",
"ETL Developer", "Electrical Engineering", "HR", "Hadoop", "Health and fitness",
"Java Developer", "Mechanical Engineer", "Network Security Engineer", "Operations Manager",
"PMO", "Python Developer", "SAP Developer", "Sales", "Testing", "Web Designing"
]
def predict(file_path):
text = extract_text_from_pdf(file_path)
text = preprocess_text(text)
text_vectorized = vectorizer.transform([text])
model = load_model()
result = model.predict(text_vectorized)
actual_result = [actual_labels[label] for label in result]
result=actual_result[0]
return result
iface = gr.Interface(
fn=predict,
inputs="file",
outputs="text"
)
iface.launch() |