Spaces:

Aashish13
/

Resume_Screening_with_Python_NLP

Build error

App Files Files Community

Resume_Screening_with_Python_NLP / app.py

Aashish13

Update app.py (#2)

b271f2c verified about 1 year ago

raw

history blame

2.98 kB

	import gradio as gr
	import pickle
	import docx
	import PyPDF2
	import re

	# Load pre-trained model and TF-IDF vectorizer
	svc_model = pickle.load(open('clf.pkl', 'rb')) # Update with your model path
	tfidf = pickle.load(open('tfidf.pkl', 'rb')) # Update with your vectorizer path
	le = pickle.load(open('encoder.pkl', 'rb')) # Update with your encoder path

	# Function to clean resume text
	def clean_resume(txt):
	clean_text = re.sub('http\S+\s', ' ', txt)
	clean_text = re.sub('RT\|cc', ' ', clean_text)
	clean_text = re.sub('#\S+\s', ' ', clean_text)
	clean_text = re.sub('@\S+', ' ', clean_text)
	clean_text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{\|}~"""), ' ', clean_text)
	clean_text = re.sub(r'[^\x00-\x7f]', ' ', clean_text)
	clean_text = re.sub('\s+', ' ', clean_text)
	return clean_text

	# Function to extract text from PDF
	def extract_text_from_pdf(file):
	pdf_reader = PyPDF2.PdfReader(file)
	text = ''
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	# Function to extract text from DOCX
	def extract_text_from_docx(file):
	doc = docx.Document(file)
	text = ''
	for paragraph in doc.paragraphs:
	text += paragraph.text + '\n'
	return text

	# Function to extract text from TXT
	def extract_text_from_txt(file):
	try:
	text = file.read().decode('utf-8')
	except UnicodeDecodeError:
	text = file.read().decode('latin-1')
	return text

	# Function to handle file upload and extraction
	def handle_file_upload(uploaded_file):
	file_extension = uploaded_file.name.split('.')[-1].lower()
	if file_extension == 'pdf':
	text = extract_text_from_pdf(uploaded_file)
	elif file_extension == 'docx':
	text = extract_text_from_docx(uploaded_file)
	elif file_extension == 'txt':
	text = extract_text_from_txt(uploaded_file)
	else:
	raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.")
	return text

	# Function to predict the category of a resume
	def predict_category(file):
	try:
	resume_text = handle_file_upload(file)
	cleaned_text = clean_resume(resume_text)
	vectorized_text = tfidf.transform([cleaned_text])
	vectorized_text = vectorized_text.toarray()
	predicted_category = svc_model.predict(vectorized_text)
	predicted_category_name = le.inverse_transform(predicted_category)
	return f"Predicted Category: {predicted_category_name[0]}"
	except Exception as e:
	return f"Error: {str(e)}"

	# Define Gradio interface
	inputs = gr.File(label="Upload Resume (PDF, DOCX, TXT)")
	outputs = gr.Textbox(label="Prediction")

	interface = gr.Interface(fn=predict_category, inputs=inputs, outputs=outputs, title="Resume Classifier",
	description="Upload your resume to predict its job category using an AI model.")

	# Launch the interface
	if __name__ == "__main__":
	interface.launch(share=True)