Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import fitz # PyMuPDF | |
| from sklearn.metrics.pairwise import cosine_similarity, cosine_distances | |
| import numpy as np | |
| def extract_text_from_pdf(pdf_content): | |
| text = '' | |
| with tempfile.NamedTemporaryFile(delete=False) as temp_file: | |
| temp_file.write(pdf_content) | |
| temp_path = temp_file.name | |
| pdf_document = fitz.open(temp_path) | |
| for page_number in range(pdf_document.page_count): | |
| page = pdf_document[page_number] | |
| text += page.get_text() | |
| pdf_document.close() # Close the PDF document explicitly | |
| os.remove(temp_path) # Remove the temporary file after use | |
| return str(text.replace("\xa0", "")) | |
| def get_most_similar_job(data, cv_vect, df_vect): | |
| for i in range(0, len([data])): | |
| distances = cosine_similarity(cv_vect[i], df_vect).flatten() | |
| indices = np.argsort(distances)[::-1] | |
| return indices | |