Spaces:
Build error
Build error
| import gradio as gr | |
| import pickle | |
| import docx | |
| import PyPDF2 | |
| import re | |
| # Load pre-trained model and TF-IDF vectorizer | |
| svc_model = pickle.load(open('clf.pkl', 'rb')) # Update with your model path | |
| tfidf = pickle.load(open('tfidf.pkl', 'rb')) # Update with your vectorizer path | |
| le = pickle.load(open('encoder.pkl', 'rb')) # Update with your encoder path | |
| # Function to clean resume text | |
| def clean_resume(txt): | |
| clean_text = re.sub('http\S+\s', ' ', txt) | |
| clean_text = re.sub('RT|cc', ' ', clean_text) | |
| clean_text = re.sub('#\S+\s', ' ', clean_text) | |
| clean_text = re.sub('@\S+', ' ', clean_text) | |
| clean_text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"""), ' ', clean_text) | |
| clean_text = re.sub(r'[^\x00-\x7f]', ' ', clean_text) | |
| clean_text = re.sub('\s+', ' ', clean_text) | |
| return clean_text | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(file): | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = '' | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to extract text from DOCX | |
| def extract_text_from_docx(file): | |
| doc = docx.Document(file) | |
| text = '' | |
| for paragraph in doc.paragraphs: | |
| text += paragraph.text + '\n' | |
| return text | |
| # Function to extract text from TXT | |
| def extract_text_from_txt(file): | |
| try: | |
| text = file.read().decode('utf-8') | |
| except UnicodeDecodeError: | |
| text = file.read().decode('latin-1') | |
| return text | |
| # Function to handle file upload and extraction | |
| def handle_file_upload(uploaded_file): | |
| file_extension = uploaded_file.name.split('.')[-1].lower() | |
| if file_extension == 'pdf': | |
| text = extract_text_from_pdf(uploaded_file) | |
| elif file_extension == 'docx': | |
| text = extract_text_from_docx(uploaded_file) | |
| elif file_extension == 'txt': | |
| text = extract_text_from_txt(uploaded_file) | |
| else: | |
| raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.") | |
| return text | |
| # Function to predict the category of a resume | |
| def predict_category(file): | |
| try: | |
| resume_text = handle_file_upload(file) | |
| cleaned_text = clean_resume(resume_text) | |
| vectorized_text = tfidf.transform([cleaned_text]) | |
| vectorized_text = vectorized_text.toarray() | |
| predicted_category = svc_model.predict(vectorized_text) | |
| predicted_category_name = le.inverse_transform(predicted_category) | |
| return f"Predicted Category: {predicted_category_name[0]}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Define Gradio interface | |
| inputs = gr.File(label="Upload Resume (PDF, DOCX, TXT)") | |
| outputs = gr.Textbox(label="Prediction") | |
| interface = gr.Interface(fn=predict_category, inputs=inputs, outputs=outputs, title="Resume Classifier", | |
| description="Upload your resume to predict its job category using an AI model.") | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| interface.launch(share=True) | |