Spaces:
Build error
Build error
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import pandas as pd | |
| import gradio as gr | |
| import pdfplumber | |
| import texthero as hero | |
| from texthero import preprocessing as ppe | |
| import re | |
| model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1') | |
| def remove_special_characters(text): | |
| pattern = r'[^a-zA-Z]' | |
| text = re.sub(pattern, ' ', text) | |
| return text | |
| #word file (Job Description) | |
| def opentxt(filepath): | |
| file_1 = open(filepath, errors="ignore") | |
| file_2 = file_1.read() | |
| file_2 = file_2.replace('\n', ' ') | |
| file_2 = re.sub('www.\S+|www.\S+', '', file_2) | |
| df_1 = pd.DataFrame([file_2], columns = ['text']) | |
| df_1['text'] = df_1['text'].apply(remove_special_characters) | |
| custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace] | |
| df_1['cleaned_text'] = hero.clean(df_1['text'], custom_pipeline) | |
| file_2 = df_1['cleaned_text'].astype(str) | |
| return file_2 | |
| #pdf file (Resume) | |
| def pdftotext(filepath): | |
| with pdfplumber.open(filepath) as pdf: | |
| first_page = pdf.pages[0] | |
| list_1 = first_page.extract_text(x_tolerance=3, y_tolerance=3) | |
| list_1 = list_1.replace('\n', ' ') | |
| list_1 = re.sub('www.\S+|www.\S+', '', list_1) | |
| df = pd.DataFrame([list_1], columns = ['text']) | |
| df['text'] = df['text'].apply(remove_special_characters) | |
| custom_pipeline = [ppe.fillna, ppe.remove_urls, ppe.remove_whitespace] | |
| df['cleaned_text'] = hero.clean(df['text'], custom_pipeline) | |
| list_1 = df['cleaned_text'].astype(str) | |
| return list_1 | |
| def sent_similarity(filepath_1, filepath_2): | |
| txt_1 = pdftotext(filepath_1.name) | |
| txt_2 = opentxt(filepath_2.name) | |
| sentences = [''.join(txt_1), ''.join(txt_2)] | |
| sentence_embeddings = model.encode(sentences) | |
| similarity = cosine_similarity(sentence_embeddings[0].reshape(1, -1),sentence_embeddings[1].reshape(1, -1))[0][0] | |
| return round(similarity*100, 2) | |
| input_1 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Resume (.pdf)', optional=False) | |
| input_2 = gr.inputs.File(file_count="single", type="file", label= 'Upload the Job Description (.txt)', optional=False) | |
| title = "Resume Screener" | |
| description = "Upload your resume(.pdf) and the job description(.txt) and let the sentence similarity model display the similarity percentage !!!" | |
| iface = gr.Interface( | |
| sent_similarity, | |
| [input_1, input_2], "label", title = title, description = description) | |
| if __name__ == "__main__": | |
| iface.launch() | |