Spaces:
Runtime error
Runtime error
Devasy Patel commited on
Add logging configuration and update similarity score display (#245)
Browse files* Add logging configuration and update similarity score display
* Refactor import statements and clean up code formatting
- Demo/DemoData.py +31 -4
- archive/Data.py +5 -8
- archive/ExtraScripts.py +25 -17
- archive/run.py +7 -7
- archive/streamlit_app.py +74 -45
- run_first.py +22 -22
- scripts/Extractor.py +66 -52
- scripts/JobDescriptionProcessor.py +13 -10
- scripts/KeytermsExtraction.py +39 -14
- scripts/LinkedinJobToPDF.py +15 -11
- scripts/ReadPdf.py +6 -5
- scripts/ResumeProcessor.py +9 -8
- scripts/TextCleaner.py +5 -5
- scripts/__init__.py +1 -1
- scripts/parsers/ParseJobDescToJson.py +8 -11
- scripts/parsers/ParseResumeToJson.py +11 -14
- scripts/parsers/__init__.py +1 -1
- scripts/similarity/__init__.py +1 -1
- scripts/similarity/get_score.py +81 -30
- scripts/similarity/get_similarity_score.py +133 -46
- scripts/utils/ReadFiles.py +6 -1
- scripts/utils/Similar.py +2 -2
- scripts/utils/Utils.py +10 -9
- scripts/utils/__init__.py +1 -1
- scripts/utils/logger.py +135 -31
- streamlit_app.py +159 -75
- streamlit_interactive.py +232 -110
- streamlit_second.py +274 -131
Demo/DemoData.py
CHANGED
|
@@ -1,6 +1,33 @@
|
|
| 1 |
-
jobs = [
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
-
resumes = [
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
jobs = [
|
| 2 |
+
{
|
| 3 |
+
"job_desc": "Job Description Product Manager 10+ Years of Experience\nTech Solutions San Francisco CA USA\nAbout Us\nTech Solutions is a global leader in the technology industry specializing in the development of cuttingedge\nsoftware products We’re currently looking for a seasoned Product Manager with over 10 years of experience\nto join our dynamic team\nJob Description\nThe Product Manager will be responsible for guiding the success of a product and leading the crossfunctional\nteam that is responsible for improving it This is an important organizational role that sets the strategy\nroadmap and feature definition for a product or product line\nResponsibilities\n•Define the product strategy and roadmap\n•Deliver MRDs and PRDs with prioritized features and corresponding justification\n•Work with external third parties to assess partnerships and licensing opportunities\n•Run beta and pilot programs with earlystage products and samples\n•Be an expert with respect to the competition\n•Act as a leader within the company\n•Develop the core positioning and messaging for the product\n•Perform product demos to customers\n•Set pricing to meet revenue and profitability goals\nRequirements\n•10+ years of experience in product management\n•Demonstrated success defining and launching excellent products\n•Excellent written and verbal communication skills\n•Technical background with experience in software development\n•Excellent teamwork skills\n•Proven ability to influence crossfunctional teams without formal authority\n•Must be able to travel 20\n•Bachelor’s degree MBA preferred\n1 Benefits\n•Competitive salary package\n•Health dental and vision insurance\n•Retirement savings plan\n•Professional development opportunities\n•Flexible work hours\nTech Solutions is an equal opportunity employer We celebrate diversity and are committed to creating\nan inclusive environment for all employees\nHow to Apply\nTo apply please submit your resume and a brief explanation of your relevant experience to \n2"
|
| 4 |
+
},
|
| 5 |
+
{
|
| 6 |
+
"job_desc": "Job Description Senior Full Stack Engineer 5+ Years of\nExperience\nTech Solutions San Francisco CA USA\nAbout Us\nTech Solutions is a leading technology company that creates innovative solutions across a variety of industries\nOur mission is to improve lives through advanced technology We’re currently seeking a Senior Full Stack\nEngineer to join our dynamic team\nJob Description\nWe’re looking for a Senior Full Stack Engineer with 5+ years of experience in developing web applications\nThe successful candidate will have experience working with both frontend and backend technologies and\nwill be capable of overseeing projects from conception to production deployment\nResponsibilities\n•Developing front end website architecture\n•Designing user interactions on web pages\n•Developing back end website applications\n•Creating servers and databases for functionality\n•Ensuring crossplatform optimization for mobile phones\n•Seeing through a project from conception to finished product\n•Designing and developing APIs\n•Meeting both technical and consumer needs\n•Staying abreast of developments in web applications and programming languages\nRequirements\n•Degree in Computer Science or similar field\n•5+ years of experience in web development\n•Strong organizational and project management skills\n•Proficiency with fundamental front end languages such as HTML CSS and JavaScript\n•Proficiency with serverside languages such as Python Ruby Java PHP and Net\n•Familiarity with database technology such as MySQL Oracle and MongoDB\n1 •Excellent verbal communication skills\n•Good problemsolving skills\n•Attention to detail\nBenefits\n•Competitive salary package\n•Health dental and vision insurance\n•Retirement savings plan\n•Professional development opportunities\n•Flexible work hours\nTech Solutions is an equal opportunity employer and we value diversity at our company\nHow to Apply\nTo apply please submit your resume and a brief explanation of your relevant experience to \n2"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"job_desc": "Job Description Front End Engineer 2 Years of Experience\nTech Solutions San Francisco CA USA\nAbout Us\nAt Tech Solutions we are on a mission to build products that solve complex problems and improve people’s\nlives We are seeking a talented Front End Engineer to join our dynamic team in San Francisco\nJob Description\nWe are looking for a Front End Engineer with at least 2 years of experience in developing scalable and\nuserfriendly web applications The successful candidate will be proficient in modern JavaScript frameworks\nand libraries HTML CSS and responsive design principles This role will contribute significantly to the\ncreation and implementation of user interfaces for our web applications\nResponsibilities\n•Develop new userfacing features using modern JavaScript frameworks like Reactjs Vuejs or Angu\nlarjs\n•Build reusable code and libraries for future use\n•Ensure the technical feasibility of UI/UX designs\n•Optimize application for maximum speed and scalability\n•Assure that all user input is validated before submitting to backend services\n•Collaborate with other team members and stakeholders\nRequirements\n•2 years of experience as a Front End Developer or similar role\n•Proficiency in web markup including HTML5 CSS3\n•Knowledge of modern JavaScript programming and experience with libraries like jQuery\n•Familiarity with modern frontend build pipelines and tools\n•Experience with popular frontend frameworks such as React Vue or Angular\n•Familiarity with code versioning tools such as Git\n•Degree in Computer Science Engineering or a related field\n1 Benefits\n•Competitive salary package\n•Health dental and vision insurance\n•Retirement savings plan\n•Professional development opportunities\n•Flexible work hours\nTech Solutions is proud to be an equal opportunity employer We celebrate diversity and are committed\nto creating an inclusive environment for all employees\nHow to Apply\nTo apply please submit your resume and a brief explanation of your relevant experience to \n2"
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"job_desc": "Job Description Java Developer 3 Years of Experience\nTech Solutions San Francisco CA USA\nAbout Us\nAt Tech Solutions we believe in the power of technology to solve complex problems We are a dynamic\nforwardthinking tech company specializing in custom software solutions for various industries We are\nseeking a talented and experienced Java Developer to join our team\nJob Description\nWe are seeking a skilled Java Developer with at least 3 years of experience in building highperforming scal\nable enterprisegrade applications You will be part of a talented software team that works on missioncritical\napplications Your roles and responsibilities will include managing Java/Java EE application development\nwhile providing expertise in the full software development lifecycle\nResponsibilities\n•Designing implementing and maintaining Java applications that are often highvolume and low\nlatency required for missioncritical systems\n•Delivering high availability and performance\n•Contributing to all phases of the development lifecycle\n•Writing welldesigned efficient and testable code\n•Conducting software analysis programming testing and debugging\n•Ensuring designs comply with specifications\n•Preparing and producing releases of software components\n•Supporting continuous improvement by investigating alternatives and technologies and presenting these\nfor architectural review\nRequirements\n•BS/MS degree in Computer Science Engineering or a related subject\n•Proven handson Software Development experience\n•Proven working experience in Java development\n•Handson experience in designing and developing applications using Java EE platforms\n•ObjectOriented Analysis and design using common design patterns\n•Profound insight of Java and JEE internals Classloading Memory Management Transaction man\nagement etc\n1 •Excellent knowledge of Relational Databases SQL and ORM technologies JPA2 Hibernate\n•Experience in developing web applications using at least one popular web framework JSF Wicket\nGWT Spring MVC\n•Experience with testdriven development\nBenefits\n•Competitive salary package\n•Health dental and vision insurance\n•Retirement savings plan\n•Professional development opportunities\n•Flexible work hours\nTech Solutions is proud to be an equal opportunity employer We celebrate diversity and are committed\nto creating an inclusive environment for all employees\nHow to Apply\nTo apply please submit your resume and a brief explanation of your relevant experience to \n2"
|
| 13 |
+
},
|
| 14 |
+
]
|
| 15 |
|
| 16 |
|
| 17 |
+
resumes = [
|
| 18 |
+
{
|
| 19 |
+
"resume": "JOHN DOE\n123 Main St Anywhere USA \nLinkedIn linkedincom/in/johndoe GitHub githubcom/johndoe\nPROFESSIONAL SUMMARY\nHighly skilled Full Stack Developer with over 5 years of experience in Java and Angular development\nspecializing in designing building testing and maintaining web applications Proficient in an assortment\nof technologies including Java Spring Boot Angular HTML5 CSS3 and SQL Exceptional ability to\nwork in a team and selfdirect Committed to providing highquality results with little supervision\nSKILLS\n•Java and J2EE\n•Spring Boot Spring MVC and Hibernate\n•Angular versions 2+\n•JavaScript TypeScript HTML5 CSS3 and Bootstrap\n•RESTful APIs\n•SQL and NoSQL databases MySQL MongoDB\n•Agile and Scrum\n•Git and GitHub\n•Junit and Mockito\n•Docker\nWORK EXPERIENCE\nFull Stack Java Developer ABC Company Inc Anywhere USA June 2018 Present\n•Developed scalable robust and maintainable enterpriselevel applications using Java and Spring\nBoot\n•Used Angular for developing dynamic and responsive web frontends improving user experience\nby 30\n•Integrated applications with MySQL and MongoDB databases to store and retrieve data efficiently\n•Collaborated in an Agile development team to deliver highquality software every sprint\n•Created RESTful services and APIs for frontend and thirdparty applications\n•Wrote unit tests using Junit and Mockito for robust testing of application components\nSoftware Developer XYZ Solutions Anywhere USA July 2016 June 2018\n•Participated in the complete software development life cycle from requirement analysis to deploy\nment\n•Implemented business logic using Java and enhanced user interface using Angular\n•Developed and maintained SQL and NoSQL databases implementing complex queries for business\nneeds\n•Utilized Git for version control and collaborated with team members via GitHub\n•Assisted in troubleshooting software debugging and system enhancements\n1 EDUCATION\nBachelor of Science in Computer Science State University Anywhere USA May 2016\nCERTIFICATIONS\n•Oracle Certified Professional Java SE 8 Programmer\n•Certified Angular Developer\n2"
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"resume": "Alfred Pennyworth\nProduct ManagerSilicon Valley CA USA\n♂¶obilealt\n/envel⌢\n/linkedininapennyworth\n\nProfessional Summary\nSeasoned Product Manager with over 20 years of experience in software development and product\nmanagement having worked at all FAANG companies Exceptional leadership skills strategic\nthinking and a track record of managing products from conception to market success\nSkills\nProduct management Agile methodologies Leadership Communication Project\nmanagement User Experience Design Market Research Data Analysis Java\nPython JavaScript HTML/CSS SQL AWS\nExperience\n2017 \nPresentProduct Manager Google Mountain View CA USA\nLeading crossfunctional teams to design develop and launch innovative products Devel\noping product strategies and making datadriven decisions to improve user experience and\nmeet business goals\n2012 2017 Software Development Engineer III Amazon Seattle WA USA\nLed a team of developers in building scalable and highperforming ecommerce applications\nSuccessfully delivered multiple projects within the stipulated time and budget\n2007 2012 Software Development Engineer II Apple Cupertino CA USA\nDesigned and implemented software components for various Apple services Optimized the\nperformance of applications and improved code quality through thorough testing\n2002 2007 Software Development Engineer I Netflix Los Gatos CA USA\nDeveloped and maintained the user interface for the Netflix web application Worked closely\nwith product managers and designers to create an optimal user experience\n1999 2002 Software Development Engineer I Facebook Menlo Park CA USA\nPlayed a key role in the development of early Facebook features Implemented scalable\nbackend services using Java and SQL\nEducation\n2016 2018 Master of Business Administration Stanford University Stanford CA USA\n1997 1999 Master of Science in Computer Science Massachusetts Institute of Technology \nCambridge MA USA\n1994 1997 Bachelor of Science in Computer Science University of California Berkeley \nBerkeley CA USA\nProjects\n1/2 2020 \nPresentPersonal Project Home Automation System\nDeveloped a smart home automation system using Raspberry Pi and Python The system\nautomates various home appliances based on user behavior and preferences contributing to\nenergy saving and improved user convenience\n2/2"
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"resume": "Harvey Dent\nMachine Learning Engineer321 Gotham St\nGotham USA\n♂¶obilealt\n/envel⌢\n/linkedininhdent\n/githubhdent\nProfessional Summary\nMachine Learning Engineer with 5 years of experience in designing building and deploying predictive\nmodels and deep learning systems Proficient in Python TensorFlow PyTorch and Scikitlearn\nCurrently leading a team of AI engineers at OpenAI\nSkills\nPython R TensorFlow PyTorch Scikitlearn Keras SQL NoSQL Git Docker\nKubernetes Agile and Scrum Statistics Data visualization Deep Learning Natural\nLanguage Processing\nExperience\n2021 \nPresentMachine Learning Engineer OpenAI San Francisco USA\nLeading a team of AI engineers Designed and implemented deep learning models for natural\nlanguage processing tasks Improved the efficiency of model training and data processing\npipelines Published several research papers in toptier AI conferences\n2018 2021 Data Scientist Uber San Francisco USA\nDeveloped and deployed machine learning models to improve the efficiency of ride allocation\nalgorithms Utilized TensorFlow and PyTorch for developing predictive models Analyzed\nand visualized large data sets to drive business strategies\n2016 2018 Junior Data Analyst Facebook Menlo Park USA\nAnalyzed and visualized large datasets using Python and R Assisted in the development of\nmachine learning models for user behavior prediction Conducted A/B testing and provided\ninsights to the product team\nEducation\n2014 2016 Master of Science in Computer Science Specialization in AI MIT Cambridge\nUSA\n2010 2014 Bachelor of Science in Computer Science UC San Diego San Diego USA\nProjects\n2021 \nPresentPersonal Project Predictive Stock Trading Model\nDeveloped a predictive stock trading model using deep learning and time series analysis\nUsed PyTorch for model development and Docker for deployment The model predicts stock\nprices with a high degree of accuracy and automates trading decisions"
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"resume": "Bruce Wayne\nMERN Stack Developer123 Gotham St\nGotham USA\n♂¶obilealt\n/envel⌢\n/linkedininbwayne\n\nProfessional Summary\nHighly skilled MERN Stack Developer with over 10 years of experience specializing in designing\nbuilding and maintaining complex web applications Proficient in MongoDB Expressjs React and\nNodejs Currently contributing to the development of AI technologies at OpenAI with a primary\nfocus on the ChatGPT project\nSkills\nJavaScript and TypeScript MongoDB Expressjs React Nodejs MERN stack\nRESTful APIs Git and GitHub Docker and Kubernetes Agile and Scrum Python\nand Machine Learning basics\nExperience\nJune 2020 \nPresentMERN Stack Developer OpenAI San Francisco USA\nWorking on the development of the ChatGPT project using Nodejs Expressjs and React\nImplementing RESTful services for communication between frontend and backend Utilizing\nDocker and Kubernetes for deployment and management of applications Working in an\nAgile environment delivering highquality software every sprint Contributing to the design\nand implementation of machine learning algorithms for natural language processing tasks\nJuly 2015 \nMay 2020Full Stack Developer Uber San Francisco USA\nDeveloped and maintained scalable web applications using MERN stack Ensured the\nperformance quality and responsiveness of applications Successfully deployed solutions\nusing Docker and Kubernetes Collaborated with a team of engineers product managers\nand UX designers Led a team of junior developers conducted code reviews and ensured\nadherence to best coding practices Worked closely with the data science team to optimize\nrecommendation algorithms and enhance user experience\nJune 2012 \nJune 2015Software Developer Facebook Menlo Park USA\nDeveloped features for the Facebook web application using React Ensured the performance\nof the MongoDB databases Utilized RESTful APIs for communication between different\nparts of the application Worked in a fastpaced testdriven development environment\nAssisted in migrating the legacy system to a modern MERN stack architecture\nEducation\n2009 2012 PhD in Computer Science CalTech Pasadena USA\n2007 2009 Master of Science in Computer Science MIT Cambridge USA\n2003 2007 Bachelor of Science in Computer Science UC San Diego San Diego USA\n1/2 Projects\n2019 \nPresentPersonal Project Gotham Event Planner\nCreated a fullfeatured web application to plan and organize events in Gotham city Used\nMERN stack for development and Docker for deployment The application allows users to\ncreate manage and share events and integrates with Google Maps API to display event\nlocations\n2/2"
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"resume": "Barry Allen\nFrontEnd DeveloperGoogle HQ Mountain View CA USA\n♂¶obilealt\n/envel⌢\n/linkedininbwayne\n\nObjective\nSeeking a challenging role as a FrontEnd Developer where I can leverage my knowledge of UI/UX\ndesign and modern web technologies to create intuitive and engaging user interfaces\nEducation\n2018 2022 BTech Computer Science and Engineering Indian Institute of Technology\nDelhi New Delhi India\nOverall GPA 95/10\nSkills\nJavaScript ES6+ TypeScript HTML5 CSS3 Python React Redux Angular\nVuejs Nodejs Expressjs D3js Git Docker Webpack Babel Google Cloud\nPlatform Firebase RESTful APIs GraphQL Agile Development TestDriven\nDevelopment Responsive Design UI/UX\nExperience\nJune 2022 \nPresentSoftware Engineer FrontEnd Google Mountain View CA USA\nDeveloping intuitive and engaging user interfaces using React and Redux Working closely\nwith UX designers to implement responsive and accessible web design Participating in\nagile development processes including sprint planning and code reviews Collaborating with\nbackend developers to integrate RESTful APIs and ensure seamless data flow\nProjects\n2022 Personal Expense Tracker\nDeveloped a personal expense tracker application using React Redux and Firebase Imple\nmented user authentication using Firebase Auth and data storage using Firestore Utilized\nD3js for data visualization to provide users with insights into their spending patterns"
|
| 32 |
+
},
|
| 33 |
+
]
|
archive/Data.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import json
|
|
|
|
| 2 |
from scripts.utils.ReadFiles import get_filenames_from_dir
|
| 3 |
|
| 4 |
resume_path = "Data/Processed/Resumes"
|
|
@@ -14,20 +15,16 @@ def read_json(filename):
|
|
| 14 |
def build_resume_list(resume_names, path):
|
| 15 |
resumes = []
|
| 16 |
for resume in resume_names:
|
| 17 |
-
selected_file = read_json(path +
|
| 18 |
-
resumes.append({
|
| 19 |
-
"resume": selected_file["clean_data"]
|
| 20 |
-
})
|
| 21 |
return resumes
|
| 22 |
|
| 23 |
|
| 24 |
def build_jobdesc_list(jobdesc_names, path):
|
| 25 |
resumes = []
|
| 26 |
for resume in resume_names:
|
| 27 |
-
selected_file = read_json(path +
|
| 28 |
-
resumes.append({
|
| 29 |
-
"resume": selected_file["clean_data"]
|
| 30 |
-
})
|
| 31 |
return resumes
|
| 32 |
|
| 33 |
|
|
|
|
| 1 |
import json
|
| 2 |
+
|
| 3 |
from scripts.utils.ReadFiles import get_filenames_from_dir
|
| 4 |
|
| 5 |
resume_path = "Data/Processed/Resumes"
|
|
|
|
| 15 |
def build_resume_list(resume_names, path):
|
| 16 |
resumes = []
|
| 17 |
for resume in resume_names:
|
| 18 |
+
selected_file = read_json(path + "/" + resume)
|
| 19 |
+
resumes.append({"resume": selected_file["clean_data"]})
|
|
|
|
|
|
|
| 20 |
return resumes
|
| 21 |
|
| 22 |
|
| 23 |
def build_jobdesc_list(jobdesc_names, path):
|
| 24 |
resumes = []
|
| 25 |
for resume in resume_names:
|
| 26 |
+
selected_file = read_json(path + "/" + resume)
|
| 27 |
+
resumes.append({"resume": selected_file["clean_data"]})
|
|
|
|
|
|
|
| 28 |
return resumes
|
| 29 |
|
| 30 |
|
archive/ExtraScripts.py
CHANGED
|
@@ -11,10 +11,10 @@ def list_to_matrix(list_to_convert, num_columns):
|
|
| 11 |
|
| 12 |
matrix = []
|
| 13 |
for i in range(len(list_to_convert) // num_columns):
|
| 14 |
-
matrix.append(list_to_convert[i * num_columns:(i + 1) * num_columns])
|
| 15 |
|
| 16 |
if len(list_to_convert) % num_columns > 0:
|
| 17 |
-
matrix.append(list_to_convert[-(len(list_to_convert) % num_columns):])
|
| 18 |
|
| 19 |
for i in range(len(matrix)):
|
| 20 |
for j in range(len(matrix[i])):
|
|
@@ -40,10 +40,10 @@ def split_list(list_to_split, chunk_size):
|
|
| 40 |
|
| 41 |
chunks = []
|
| 42 |
for i in range(num_chunks):
|
| 43 |
-
chunks.append(list_to_split[i * chunk_size:(i + 1) * chunk_size])
|
| 44 |
|
| 45 |
if remainder > 0:
|
| 46 |
-
chunks.append(list_to_split[num_chunks * chunk_size:])
|
| 47 |
|
| 48 |
return chunks
|
| 49 |
|
|
@@ -53,10 +53,17 @@ def dirty_intersection(list1, list2):
|
|
| 53 |
remainder_1 = [x for x in list1 if x not in intersection]
|
| 54 |
remainder_2 = [x for x in list2 if x not in intersection]
|
| 55 |
|
| 56 |
-
output = pd.DataFrame(
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
return output
|
| 62 |
|
|
@@ -82,26 +89,27 @@ def find_intersection_of_lists(list1, list2):
|
|
| 82 |
return max_value
|
| 83 |
|
| 84 |
def fill_by_complements(num: int, list_to_fill: list):
|
| 85 |
-
if
|
| 86 |
-
for i in range(num-len(list_to_fill)):
|
| 87 |
list_to_fill.append(" ")
|
| 88 |
|
| 89 |
intersection = list(set(list1) & set(list2))
|
| 90 |
remainder_1 = [x for x in list1 if x not in intersection]
|
| 91 |
remainder_2 = [x for x in list2 if x not in intersection]
|
| 92 |
|
| 93 |
-
max_count = max_of_three(
|
| 94 |
-
len(intersection), len(remainder_1), len(remainder_2))
|
| 95 |
|
| 96 |
fill_by_complements(max_count, intersection)
|
| 97 |
fill_by_complements(max_count, remainder_1)
|
| 98 |
fill_by_complements(max_count, remainder_2)
|
| 99 |
|
| 100 |
-
df = pd.DataFrame(
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
| 105 |
|
| 106 |
return df
|
| 107 |
|
|
|
|
| 11 |
|
| 12 |
matrix = []
|
| 13 |
for i in range(len(list_to_convert) // num_columns):
|
| 14 |
+
matrix.append(list_to_convert[i * num_columns : (i + 1) * num_columns])
|
| 15 |
|
| 16 |
if len(list_to_convert) % num_columns > 0:
|
| 17 |
+
matrix.append(list_to_convert[-(len(list_to_convert) % num_columns) :])
|
| 18 |
|
| 19 |
for i in range(len(matrix)):
|
| 20 |
for j in range(len(matrix[i])):
|
|
|
|
| 40 |
|
| 41 |
chunks = []
|
| 42 |
for i in range(num_chunks):
|
| 43 |
+
chunks.append(list_to_split[i * chunk_size : (i + 1) * chunk_size])
|
| 44 |
|
| 45 |
if remainder > 0:
|
| 46 |
+
chunks.append(list_to_split[num_chunks * chunk_size :])
|
| 47 |
|
| 48 |
return chunks
|
| 49 |
|
|
|
|
| 53 |
remainder_1 = [x for x in list1 if x not in intersection]
|
| 54 |
remainder_2 = [x for x in list2 if x not in intersection]
|
| 55 |
|
| 56 |
+
output = pd.DataFrame(
|
| 57 |
+
{
|
| 58 |
+
"elements": [
|
| 59 |
+
"Common words",
|
| 60 |
+
"Words unique to Resume",
|
| 61 |
+
"Words unique to Job Description",
|
| 62 |
+
],
|
| 63 |
+
"values": [len(intersection), len(remainder_1), len(remainder_2)],
|
| 64 |
+
},
|
| 65 |
+
index=[1, 2, 3],
|
| 66 |
+
)
|
| 67 |
|
| 68 |
return output
|
| 69 |
|
|
|
|
| 89 |
return max_value
|
| 90 |
|
| 91 |
def fill_by_complements(num: int, list_to_fill: list):
|
| 92 |
+
if num > len(list_to_fill):
|
| 93 |
+
for i in range(num - len(list_to_fill)):
|
| 94 |
list_to_fill.append(" ")
|
| 95 |
|
| 96 |
intersection = list(set(list1) & set(list2))
|
| 97 |
remainder_1 = [x for x in list1 if x not in intersection]
|
| 98 |
remainder_2 = [x for x in list2 if x not in intersection]
|
| 99 |
|
| 100 |
+
max_count = max_of_three(len(intersection), len(remainder_1), len(remainder_2))
|
|
|
|
| 101 |
|
| 102 |
fill_by_complements(max_count, intersection)
|
| 103 |
fill_by_complements(max_count, remainder_1)
|
| 104 |
fill_by_complements(max_count, remainder_2)
|
| 105 |
|
| 106 |
+
df = pd.DataFrame(
|
| 107 |
+
{
|
| 108 |
+
"intersection": intersection,
|
| 109 |
+
"remainder_1": remainder_1,
|
| 110 |
+
"remainder_2": remainder_2,
|
| 111 |
+
}
|
| 112 |
+
)
|
| 113 |
|
| 114 |
return df
|
| 115 |
|
archive/run.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
-
|
| 2 |
-
from scripts.parsers.ParseJobDescToJson import ParseJobDesc
|
| 3 |
-
from scripts.ReadPdf import read_single_pdf
|
| 4 |
import os.path
|
| 5 |
import pathlib
|
| 6 |
-
import json
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
READ_DATA_FROM =
|
| 10 |
-
SAVE_DIRECTORY =
|
| 11 |
|
| 12 |
|
| 13 |
def read_resumes(input_file: str) -> dict:
|
| 14 |
-
input_file_name = os.path.join(READ_DATA_FROM+input_file)
|
| 15 |
data = read_single_pdf(input_file_name)
|
| 16 |
output = ParseResume(data).get_JSON()
|
| 17 |
return output
|
|
|
|
| 1 |
+
import json
|
|
|
|
|
|
|
| 2 |
import os.path
|
| 3 |
import pathlib
|
|
|
|
| 4 |
|
| 5 |
+
from scripts.parsers.ParseJobDescToJson import ParseJobDesc
|
| 6 |
+
from scripts.parsers.ParseResumeToJson import ParseResume
|
| 7 |
+
from scripts.ReadPdf import read_single_pdf
|
| 8 |
|
| 9 |
+
READ_DATA_FROM = "Data/Raw/"
|
| 10 |
+
SAVE_DIRECTORY = "Data/Processed/"
|
| 11 |
|
| 12 |
|
| 13 |
def read_resumes(input_file: str) -> dict:
|
| 14 |
+
input_file_name = os.path.join(READ_DATA_FROM + input_file)
|
| 15 |
data = read_single_pdf(input_file_name)
|
| 16 |
output = ParseResume(data).get_JSON()
|
| 17 |
return output
|
archive/streamlit_app.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
|
|
| 1 |
import string
|
| 2 |
-
|
| 3 |
-
import
|
| 4 |
-
import streamlit as st
|
| 5 |
import pandas as pd
|
| 6 |
-
import json
|
| 7 |
import plotly.express as px
|
| 8 |
import plotly.graph_objects as go
|
| 9 |
-
import
|
|
|
|
| 10 |
import squarify
|
|
|
|
| 11 |
|
| 12 |
-
st.title(
|
| 13 |
-
st.image(
|
| 14 |
-
st.subheader(
|
| 15 |
|
| 16 |
|
| 17 |
def read_json(filename):
|
|
@@ -21,38 +22,49 @@ def read_json(filename):
|
|
| 21 |
|
| 22 |
|
| 23 |
# read the json file
|
| 24 |
-
resume = read_json(
|
| 25 |
-
'Data/Processed/Resume-d531571e-e4fa-45eb-ab6a-267cdeb6647e.json')
|
| 26 |
job_desc = read_json(
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
st.write("### Reading Resume's POS")
|
| 30 |
-
df = pd.DataFrame(resume[
|
| 31 |
-
fig = go.Figure(
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
st.write(fig)
|
| 34 |
|
| 35 |
-
df2 = pd.DataFrame(resume[
|
| 36 |
st.dataframe(df2)
|
| 37 |
|
| 38 |
# Create the dictionary
|
| 39 |
keyword_dict = {}
|
| 40 |
-
for keyword, value in resume[
|
| 41 |
keyword_dict[keyword] = value
|
| 42 |
|
| 43 |
-
fig = go.Figure(
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
st.plotly_chart(fig)
|
| 52 |
|
| 53 |
st.divider()
|
| 54 |
|
| 55 |
-
for keyword, value in resume[
|
| 56 |
pass
|
| 57 |
|
| 58 |
|
|
@@ -62,7 +74,8 @@ figure = plt.figure(
|
|
| 62 |
rows=20,
|
| 63 |
columns=20,
|
| 64 |
values=keyword_dict,
|
| 65 |
-
legend={
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
# Display the dictionary
|
|
@@ -70,32 +83,48 @@ figure = plt.figure(
|
|
| 70 |
st.pyplot(fig=figure)
|
| 71 |
# st.write(dict)
|
| 72 |
|
| 73 |
-
fig = px.treemap(
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
st.write(fig)
|
| 77 |
|
| 78 |
|
| 79 |
st.plotly_chart(figure_or_data=fig)
|
| 80 |
|
| 81 |
-
fig = go.Figure(
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
st.plotly_chart(figure_or_data=fig)
|
| 90 |
|
| 91 |
-
fig = go.Figure(
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
st.plotly_chart(figure_or_data=fig)
|
| 100 |
-
|
| 101 |
-
|
|
|
|
| 1 |
+
import json
|
| 2 |
import string
|
| 3 |
+
|
| 4 |
+
import matplotlib.pyplot as plt
|
|
|
|
| 5 |
import pandas as pd
|
|
|
|
| 6 |
import plotly.express as px
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
+
import pywaffle
|
| 9 |
+
import spacy
|
| 10 |
import squarify
|
| 11 |
+
import streamlit as st
|
| 12 |
|
| 13 |
+
st.title("Resume :blue[Matcher]")
|
| 14 |
+
st.image("Assets/img/header_image.jpg")
|
| 15 |
+
st.subheader("_AI Based Resume Analyzer & Ranker_")
|
| 16 |
|
| 17 |
|
| 18 |
def read_json(filename):
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
# read the json file
|
| 25 |
+
resume = read_json("Data/Processed/Resume-d531571e-e4fa-45eb-ab6a-267cdeb6647e.json")
|
|
|
|
| 26 |
job_desc = read_json(
|
| 27 |
+
"Data/Processed/Job-Desc-a4f06ccb-8d5a-4d0b-9f02-3ba6d686472e.json"
|
| 28 |
+
)
|
| 29 |
|
| 30 |
st.write("### Reading Resume's POS")
|
| 31 |
+
df = pd.DataFrame(resume["pos_frequencies"], index=[0])
|
| 32 |
+
fig = go.Figure(
|
| 33 |
+
data=go.Bar(
|
| 34 |
+
y=list(resume["pos_frequencies"].values()),
|
| 35 |
+
x=list(resume["pos_frequencies"].keys()),
|
| 36 |
+
),
|
| 37 |
+
layout_title_text="Resume's POS",
|
| 38 |
+
)
|
| 39 |
st.write(fig)
|
| 40 |
|
| 41 |
+
df2 = pd.DataFrame(resume["keyterms"], columns=["keyword", "value"])
|
| 42 |
st.dataframe(df2)
|
| 43 |
|
| 44 |
# Create the dictionary
|
| 45 |
keyword_dict = {}
|
| 46 |
+
for keyword, value in resume["keyterms"]:
|
| 47 |
keyword_dict[keyword] = value
|
| 48 |
|
| 49 |
+
fig = go.Figure(
|
| 50 |
+
data=[
|
| 51 |
+
go.Table(
|
| 52 |
+
header=dict(
|
| 53 |
+
values=["Keyword", "Value"], font=dict(size=12), fill_color="#070A52"
|
| 54 |
+
),
|
| 55 |
+
cells=dict(
|
| 56 |
+
values=[list(keyword_dict.keys()), list(keyword_dict.values())],
|
| 57 |
+
line_color="darkslategray",
|
| 58 |
+
fill_color="#6DA9E4",
|
| 59 |
+
),
|
| 60 |
+
)
|
| 61 |
+
]
|
| 62 |
+
)
|
| 63 |
st.plotly_chart(fig)
|
| 64 |
|
| 65 |
st.divider()
|
| 66 |
|
| 67 |
+
for keyword, value in resume["keyterms"]:
|
| 68 |
pass
|
| 69 |
|
| 70 |
|
|
|
|
| 74 |
rows=20,
|
| 75 |
columns=20,
|
| 76 |
values=keyword_dict,
|
| 77 |
+
legend={"loc": "upper left", "bbox_to_anchor": (1, 1)},
|
| 78 |
+
)
|
| 79 |
|
| 80 |
|
| 81 |
# Display the dictionary
|
|
|
|
| 83 |
st.pyplot(fig=figure)
|
| 84 |
# st.write(dict)
|
| 85 |
|
| 86 |
+
fig = px.treemap(
|
| 87 |
+
df2,
|
| 88 |
+
path=["keyword"],
|
| 89 |
+
values="value",
|
| 90 |
+
color_continuous_scale="RdBu",
|
| 91 |
+
title="Resume POS",
|
| 92 |
+
)
|
| 93 |
st.write(fig)
|
| 94 |
|
| 95 |
|
| 96 |
st.plotly_chart(figure_or_data=fig)
|
| 97 |
|
| 98 |
+
fig = go.Figure(
|
| 99 |
+
data=[
|
| 100 |
+
go.Table(
|
| 101 |
+
header=dict(
|
| 102 |
+
values=["Tri Grams"],
|
| 103 |
+
fill_color="#1D267D",
|
| 104 |
+
align="center",
|
| 105 |
+
font=dict(color="white", size=16),
|
| 106 |
+
),
|
| 107 |
+
cells=dict(
|
| 108 |
+
values=[resume["tri_grams"]], fill_color="#19A7CE", align="left"
|
| 109 |
+
),
|
| 110 |
+
)
|
| 111 |
+
]
|
| 112 |
+
)
|
| 113 |
|
| 114 |
st.plotly_chart(figure_or_data=fig)
|
| 115 |
|
| 116 |
+
fig = go.Figure(
|
| 117 |
+
data=[
|
| 118 |
+
go.Table(
|
| 119 |
+
header=dict(
|
| 120 |
+
values=["Bi Grams"],
|
| 121 |
+
fill_color="#1D267D",
|
| 122 |
+
align="center",
|
| 123 |
+
font=dict(color="white", size=16),
|
| 124 |
+
),
|
| 125 |
+
cells=dict(values=[resume["bi_grams"]], fill_color="#19A7CE", align="left"),
|
| 126 |
+
)
|
| 127 |
+
]
|
| 128 |
+
)
|
| 129 |
|
| 130 |
st.plotly_chart(figure_or_data=fig)
|
|
|
|
|
|
run_first.py
CHANGED
|
@@ -1,19 +1,22 @@
|
|
| 1 |
import json
|
| 2 |
-
from scripts import ResumeProcessor, JobDescriptionProcessor
|
| 3 |
-
from scripts.utils import init_logging_config, get_filenames_from_dir
|
| 4 |
import logging
|
| 5 |
import os
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
init_logging_config()
|
| 8 |
|
| 9 |
PROCESSED_RESUMES_PATH = "Data/Processed/Resumes"
|
| 10 |
PROCESSED_JOB_DESCRIPTIONS_PATH = "Data/Processed/JobDescription"
|
| 11 |
|
|
|
|
| 12 |
def read_json(filename):
|
| 13 |
with open(filename) as f:
|
| 14 |
data = json.load(f)
|
| 15 |
return data
|
| 16 |
|
|
|
|
| 17 |
def remove_old_files(files_path):
|
| 18 |
|
| 19 |
for filename in os.listdir(files_path):
|
|
@@ -22,56 +25,53 @@ def remove_old_files(files_path):
|
|
| 22 |
|
| 23 |
if os.path.isfile(file_path):
|
| 24 |
os.remove(file_path)
|
| 25 |
-
except Exception as e:
|
| 26 |
logging.error(f"Error deleting {file_path}:\n{e}")
|
| 27 |
|
| 28 |
-
logging.info("Deleted old files from "+files_path)
|
| 29 |
|
| 30 |
|
| 31 |
-
logging.info(
|
| 32 |
try:
|
| 33 |
# Check if there are resumes present or not.
|
| 34 |
# If present then parse it.
|
| 35 |
remove_old_files(PROCESSED_RESUMES_PATH)
|
| 36 |
|
| 37 |
file_names = get_filenames_from_dir("Data/Resumes")
|
| 38 |
-
logging.info(
|
| 39 |
except:
|
| 40 |
# Exit the program if there are no resumes.
|
| 41 |
-
logging.error(
|
| 42 |
-
logging.error(
|
| 43 |
-
logging.error(
|
| 44 |
-
'Please add resumes in the Data/Resumes folder and try again.')
|
| 45 |
exit(1)
|
| 46 |
|
| 47 |
# Now after getting the file_names parse the resumes into a JSON Format.
|
| 48 |
-
logging.info(
|
| 49 |
for file in file_names:
|
| 50 |
processor = ResumeProcessor(file)
|
| 51 |
success = processor.process()
|
| 52 |
-
logging.info(
|
| 53 |
|
| 54 |
-
logging.info(
|
| 55 |
try:
|
| 56 |
# Check if there are resumes present or not.
|
| 57 |
# If present then parse it.
|
| 58 |
remove_old_files(PROCESSED_JOB_DESCRIPTIONS_PATH)
|
| 59 |
|
| 60 |
file_names = get_filenames_from_dir("Data/JobDescription")
|
| 61 |
-
logging.info(
|
| 62 |
except:
|
| 63 |
# Exit the program if there are no resumes.
|
| 64 |
-
logging.error(
|
| 65 |
-
|
| 66 |
-
logging.error(
|
| 67 |
-
logging.error(
|
| 68 |
-
'Please add resumes in the Data/JobDescription folder and try again.')
|
| 69 |
exit(1)
|
| 70 |
|
| 71 |
# Now after getting the file_names parse the resumes into a JSON Format.
|
| 72 |
-
logging.info(
|
| 73 |
for file in file_names:
|
| 74 |
processor = JobDescriptionProcessor(file)
|
| 75 |
success = processor.process()
|
| 76 |
-
logging.info(
|
| 77 |
-
logging.info(
|
|
|
|
| 1 |
import json
|
|
|
|
|
|
|
| 2 |
import logging
|
| 3 |
import os
|
| 4 |
|
| 5 |
+
from scripts import JobDescriptionProcessor, ResumeProcessor
|
| 6 |
+
from scripts.utils import get_filenames_from_dir, init_logging_config
|
| 7 |
+
|
| 8 |
init_logging_config()
|
| 9 |
|
| 10 |
PROCESSED_RESUMES_PATH = "Data/Processed/Resumes"
|
| 11 |
PROCESSED_JOB_DESCRIPTIONS_PATH = "Data/Processed/JobDescription"
|
| 12 |
|
| 13 |
+
|
| 14 |
def read_json(filename):
|
| 15 |
with open(filename) as f:
|
| 16 |
data = json.load(f)
|
| 17 |
return data
|
| 18 |
|
| 19 |
+
|
| 20 |
def remove_old_files(files_path):
|
| 21 |
|
| 22 |
for filename in os.listdir(files_path):
|
|
|
|
| 25 |
|
| 26 |
if os.path.isfile(file_path):
|
| 27 |
os.remove(file_path)
|
| 28 |
+
except Exception as e:
|
| 29 |
logging.error(f"Error deleting {file_path}:\n{e}")
|
| 30 |
|
| 31 |
+
logging.info("Deleted old files from " + files_path)
|
| 32 |
|
| 33 |
|
| 34 |
+
logging.info("Started to read from Data/Resumes")
|
| 35 |
try:
|
| 36 |
# Check if there are resumes present or not.
|
| 37 |
# If present then parse it.
|
| 38 |
remove_old_files(PROCESSED_RESUMES_PATH)
|
| 39 |
|
| 40 |
file_names = get_filenames_from_dir("Data/Resumes")
|
| 41 |
+
logging.info("Reading from Data/Resumes is now complete.")
|
| 42 |
except:
|
| 43 |
# Exit the program if there are no resumes.
|
| 44 |
+
logging.error("There are no resumes present in the specified folder.")
|
| 45 |
+
logging.error("Exiting from the program.")
|
| 46 |
+
logging.error("Please add resumes in the Data/Resumes folder and try again.")
|
|
|
|
| 47 |
exit(1)
|
| 48 |
|
| 49 |
# Now after getting the file_names parse the resumes into a JSON Format.
|
| 50 |
+
logging.info("Started parsing the resumes.")
|
| 51 |
for file in file_names:
|
| 52 |
processor = ResumeProcessor(file)
|
| 53 |
success = processor.process()
|
| 54 |
+
logging.info("Parsing of the resumes is now complete.")
|
| 55 |
|
| 56 |
+
logging.info("Started to read from Data/JobDescription")
|
| 57 |
try:
|
| 58 |
# Check if there are resumes present or not.
|
| 59 |
# If present then parse it.
|
| 60 |
remove_old_files(PROCESSED_JOB_DESCRIPTIONS_PATH)
|
| 61 |
|
| 62 |
file_names = get_filenames_from_dir("Data/JobDescription")
|
| 63 |
+
logging.info("Reading from Data/JobDescription is now complete.")
|
| 64 |
except:
|
| 65 |
# Exit the program if there are no resumes.
|
| 66 |
+
logging.error("There are no job-description present in the specified folder.")
|
| 67 |
+
logging.error("Exiting from the program.")
|
| 68 |
+
logging.error("Please add resumes in the Data/JobDescription folder and try again.")
|
|
|
|
|
|
|
| 69 |
exit(1)
|
| 70 |
|
| 71 |
# Now after getting the file_names parse the resumes into a JSON Format.
|
| 72 |
+
logging.info("Started parsing the Job Descriptions.")
|
| 73 |
for file in file_names:
|
| 74 |
processor = JobDescriptionProcessor(file)
|
| 75 |
success = processor.process()
|
| 76 |
+
logging.info("Parsing of the Job Descriptions is now complete.")
|
| 77 |
+
logging.info("Success now run `streamlit run streamlit_second.py`")
|
scripts/Extractor.py
CHANGED
|
@@ -1,43 +1,44 @@
|
|
| 1 |
import re
|
| 2 |
import urllib.request
|
|
|
|
| 3 |
import spacy
|
| 4 |
-
from .utils import TextCleaner
|
| 5 |
|
|
|
|
| 6 |
|
| 7 |
# Load the English model
|
| 8 |
-
nlp = spacy.load(
|
| 9 |
|
| 10 |
|
| 11 |
RESUME_SECTIONS = [
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
]
|
| 42 |
|
| 43 |
|
|
@@ -68,7 +69,7 @@ class DataExtractor:
|
|
| 68 |
Returns:
|
| 69 |
list: A list containing all the found links.
|
| 70 |
"""
|
| 71 |
-
link_pattern = r
|
| 72 |
links = re.findall(link_pattern, self.text)
|
| 73 |
return links
|
| 74 |
|
|
@@ -86,19 +87,28 @@ class DataExtractor:
|
|
| 86 |
links = []
|
| 87 |
try:
|
| 88 |
response = urllib.request.urlopen(self.text)
|
| 89 |
-
html_content = response.read().decode(
|
| 90 |
pattern = r'href=[\'"]?([^\'" >]+)'
|
| 91 |
raw_links = re.findall(pattern, html_content)
|
| 92 |
for link in raw_links:
|
| 93 |
-
if link.startswith(
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
links.append(link)
|
| 96 |
except Exception as e:
|
| 97 |
print(f"Error extracting links: {str(e)}")
|
| 98 |
return links
|
| 99 |
|
| 100 |
def extract_names(self):
|
| 101 |
-
"""Extracts and returns a list of names from the given
|
| 102 |
text using spaCy's named entity recognition.
|
| 103 |
|
| 104 |
Args:
|
|
@@ -107,7 +117,7 @@ class DataExtractor:
|
|
| 107 |
Returns:
|
| 108 |
list: A list of strings representing the names extracted from the text.
|
| 109 |
"""
|
| 110 |
-
names = [ent.text for ent in self.doc.ents if ent.label_ ==
|
| 111 |
return names
|
| 112 |
|
| 113 |
def extract_emails(self):
|
|
@@ -120,7 +130,7 @@ class DataExtractor:
|
|
| 120 |
Returns:
|
| 121 |
list: A list containing all the extracted email addresses.
|
| 122 |
"""
|
| 123 |
-
email_pattern = r
|
| 124 |
emails = re.findall(email_pattern, self.text)
|
| 125 |
return emails
|
| 126 |
|
|
@@ -134,7 +144,9 @@ class DataExtractor:
|
|
| 134 |
Returns:
|
| 135 |
list: A list containing all the extracted phone numbers.
|
| 136 |
"""
|
| 137 |
-
phone_number_pattern =
|
|
|
|
|
|
|
| 138 |
phone_numbers = re.findall(phone_number_pattern, self.text)
|
| 139 |
return phone_numbers
|
| 140 |
|
|
@@ -153,7 +165,7 @@ class DataExtractor:
|
|
| 153 |
|
| 154 |
for token in self.doc:
|
| 155 |
if token.text in RESUME_SECTIONS:
|
| 156 |
-
if token.text ==
|
| 157 |
in_experience_section = True
|
| 158 |
else:
|
| 159 |
in_experience_section = False
|
|
@@ -161,21 +173,22 @@ class DataExtractor:
|
|
| 161 |
if in_experience_section:
|
| 162 |
experience_section.append(token.text)
|
| 163 |
|
| 164 |
-
return
|
| 165 |
|
| 166 |
def extract_position_year(self):
|
| 167 |
"""
|
| 168 |
-
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
"""
|
| 176 |
-
position_year_search_pattern =
|
| 177 |
-
|
| 178 |
-
|
|
|
|
| 179 |
return position_year
|
| 180 |
|
| 181 |
def extract_particular_words(self):
|
|
@@ -188,7 +201,7 @@ class DataExtractor:
|
|
| 188 |
Returns:
|
| 189 |
list: A list of extracted nouns.
|
| 190 |
"""
|
| 191 |
-
pos_tags = [
|
| 192 |
nouns = [token.text for token in self.doc if token.pos_ in pos_tags]
|
| 193 |
return nouns
|
| 194 |
|
|
@@ -202,7 +215,8 @@ class DataExtractor:
|
|
| 202 |
Returns:
|
| 203 |
list: A list of extracted entities.
|
| 204 |
"""
|
| 205 |
-
entity_labels = [
|
| 206 |
entities = [
|
| 207 |
-
token.text for token in self.doc.ents if token.label_ in entity_labels
|
|
|
|
| 208 |
return list(set(entities))
|
|
|
|
| 1 |
import re
|
| 2 |
import urllib.request
|
| 3 |
+
|
| 4 |
import spacy
|
|
|
|
| 5 |
|
| 6 |
+
from .utils import TextCleaner
|
| 7 |
|
| 8 |
# Load the English model
|
| 9 |
+
nlp = spacy.load("en_core_web_sm")
|
| 10 |
|
| 11 |
|
| 12 |
RESUME_SECTIONS = [
|
| 13 |
+
"Contact Information",
|
| 14 |
+
"Objective",
|
| 15 |
+
"Summary",
|
| 16 |
+
"Education",
|
| 17 |
+
"Experience",
|
| 18 |
+
"Skills",
|
| 19 |
+
"Projects",
|
| 20 |
+
"Certifications",
|
| 21 |
+
"Licenses",
|
| 22 |
+
"Awards",
|
| 23 |
+
"Honors",
|
| 24 |
+
"Publications",
|
| 25 |
+
"References",
|
| 26 |
+
"Technical Skills",
|
| 27 |
+
"Computer Skills",
|
| 28 |
+
"Programming Languages",
|
| 29 |
+
"Software Skills",
|
| 30 |
+
"Soft Skills",
|
| 31 |
+
"Language Skills",
|
| 32 |
+
"Professional Skills",
|
| 33 |
+
"Transferable Skills",
|
| 34 |
+
"Work Experience",
|
| 35 |
+
"Professional Experience",
|
| 36 |
+
"Employment History",
|
| 37 |
+
"Internship Experience",
|
| 38 |
+
"Volunteer Experience",
|
| 39 |
+
"Leadership Experience",
|
| 40 |
+
"Research Experience",
|
| 41 |
+
"Teaching Experience",
|
| 42 |
]
|
| 43 |
|
| 44 |
|
|
|
|
| 69 |
Returns:
|
| 70 |
list: A list containing all the found links.
|
| 71 |
"""
|
| 72 |
+
link_pattern = r"\b(?:https?://|www\.)\S+\b"
|
| 73 |
links = re.findall(link_pattern, self.text)
|
| 74 |
return links
|
| 75 |
|
|
|
|
| 87 |
links = []
|
| 88 |
try:
|
| 89 |
response = urllib.request.urlopen(self.text)
|
| 90 |
+
html_content = response.read().decode("utf-8")
|
| 91 |
pattern = r'href=[\'"]?([^\'" >]+)'
|
| 92 |
raw_links = re.findall(pattern, html_content)
|
| 93 |
for link in raw_links:
|
| 94 |
+
if link.startswith(
|
| 95 |
+
(
|
| 96 |
+
"http://",
|
| 97 |
+
"https://",
|
| 98 |
+
"ftp://",
|
| 99 |
+
"mailto:",
|
| 100 |
+
"www.linkedin.com",
|
| 101 |
+
"github.com/",
|
| 102 |
+
"twitter.com",
|
| 103 |
+
)
|
| 104 |
+
):
|
| 105 |
links.append(link)
|
| 106 |
except Exception as e:
|
| 107 |
print(f"Error extracting links: {str(e)}")
|
| 108 |
return links
|
| 109 |
|
| 110 |
def extract_names(self):
|
| 111 |
+
"""Extracts and returns a list of names from the given
|
| 112 |
text using spaCy's named entity recognition.
|
| 113 |
|
| 114 |
Args:
|
|
|
|
| 117 |
Returns:
|
| 118 |
list: A list of strings representing the names extracted from the text.
|
| 119 |
"""
|
| 120 |
+
names = [ent.text for ent in self.doc.ents if ent.label_ == "PERSON"]
|
| 121 |
return names
|
| 122 |
|
| 123 |
def extract_emails(self):
|
|
|
|
| 130 |
Returns:
|
| 131 |
list: A list containing all the extracted email addresses.
|
| 132 |
"""
|
| 133 |
+
email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"
|
| 134 |
emails = re.findall(email_pattern, self.text)
|
| 135 |
return emails
|
| 136 |
|
|
|
|
| 144 |
Returns:
|
| 145 |
list: A list containing all the extracted phone numbers.
|
| 146 |
"""
|
| 147 |
+
phone_number_pattern = (
|
| 148 |
+
r"^(\+\d{1,3})?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}$"
|
| 149 |
+
)
|
| 150 |
phone_numbers = re.findall(phone_number_pattern, self.text)
|
| 151 |
return phone_numbers
|
| 152 |
|
|
|
|
| 165 |
|
| 166 |
for token in self.doc:
|
| 167 |
if token.text in RESUME_SECTIONS:
|
| 168 |
+
if token.text == "Experience" or "EXPERIENCE" or "experience":
|
| 169 |
in_experience_section = True
|
| 170 |
else:
|
| 171 |
in_experience_section = False
|
|
|
|
| 173 |
if in_experience_section:
|
| 174 |
experience_section.append(token.text)
|
| 175 |
|
| 176 |
+
return " ".join(experience_section)
|
| 177 |
|
| 178 |
def extract_position_year(self):
|
| 179 |
"""
|
| 180 |
+
Extract position and year from a given string.
|
| 181 |
|
| 182 |
+
Args:
|
| 183 |
+
text (str): The string from which to extract position and year.
|
| 184 |
|
| 185 |
+
Returns:
|
| 186 |
+
list: A list containing the extracted position and year.
|
| 187 |
"""
|
| 188 |
+
position_year_search_pattern = (
|
| 189 |
+
r"(\b\w+\b\s+\b\w+\b),\s+(\d{4})\s*-\s*(\d{4}|\bpresent\b)"
|
| 190 |
+
)
|
| 191 |
+
position_year = re.findall(position_year_search_pattern, self.text)
|
| 192 |
return position_year
|
| 193 |
|
| 194 |
def extract_particular_words(self):
|
|
|
|
| 201 |
Returns:
|
| 202 |
list: A list of extracted nouns.
|
| 203 |
"""
|
| 204 |
+
pos_tags = ["NOUN", "PROPN"]
|
| 205 |
nouns = [token.text for token in self.doc if token.pos_ in pos_tags]
|
| 206 |
return nouns
|
| 207 |
|
|
|
|
| 215 |
Returns:
|
| 216 |
list: A list of extracted entities.
|
| 217 |
"""
|
| 218 |
+
entity_labels = ["GPE", "ORG"]
|
| 219 |
entities = [
|
| 220 |
+
token.text for token in self.doc.ents if token.label_ in entity_labels
|
| 221 |
+
]
|
| 222 |
return list(set(entities))
|
scripts/JobDescriptionProcessor.py
CHANGED
|
@@ -1,19 +1,18 @@
|
|
| 1 |
-
|
| 2 |
-
from .parsers import ParseJobDesc
|
| 3 |
-
from .ReadPdf import read_single_pdf
|
| 4 |
import os.path
|
| 5 |
import pathlib
|
| 6 |
-
import json
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
class JobDescriptionProcessor:
|
| 13 |
def __init__(self, input_file):
|
| 14 |
self.input_file = input_file
|
| 15 |
-
self.input_file_name = os.path.join(
|
| 16 |
-
READ_JOB_DESCRIPTION_FROM + self.input_file)
|
| 17 |
|
| 18 |
def process(self) -> bool:
|
| 19 |
try:
|
|
@@ -35,8 +34,12 @@ class JobDescriptionProcessor:
|
|
| 35 |
return output
|
| 36 |
|
| 37 |
def _write_json_file(self, resume_dictionary: dict):
|
| 38 |
-
file_name = str(
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
save_directory_name = pathlib.Path(SAVE_DIRECTORY) / file_name
|
| 41 |
json_object = json.dumps(resume_dictionary, sort_keys=True, indent=14)
|
| 42 |
with open(save_directory_name, "w+") as outfile:
|
|
|
|
| 1 |
+
import json
|
|
|
|
|
|
|
| 2 |
import os.path
|
| 3 |
import pathlib
|
|
|
|
| 4 |
|
| 5 |
+
from .parsers import ParseJobDesc, ParseResume
|
| 6 |
+
from .ReadPdf import read_single_pdf
|
| 7 |
+
|
| 8 |
+
READ_JOB_DESCRIPTION_FROM = "Data/JobDescription/"
|
| 9 |
+
SAVE_DIRECTORY = "Data/Processed/JobDescription"
|
| 10 |
|
| 11 |
|
| 12 |
class JobDescriptionProcessor:
|
| 13 |
def __init__(self, input_file):
|
| 14 |
self.input_file = input_file
|
| 15 |
+
self.input_file_name = os.path.join(READ_JOB_DESCRIPTION_FROM + self.input_file)
|
|
|
|
| 16 |
|
| 17 |
def process(self) -> bool:
|
| 18 |
try:
|
|
|
|
| 34 |
return output
|
| 35 |
|
| 36 |
def _write_json_file(self, resume_dictionary: dict):
|
| 37 |
+
file_name = str(
|
| 38 |
+
"JobDescription-"
|
| 39 |
+
+ self.input_file
|
| 40 |
+
+ resume_dictionary["unique_id"]
|
| 41 |
+
+ ".json"
|
| 42 |
+
)
|
| 43 |
save_directory_name = pathlib.Path(SAVE_DIRECTORY) / file_name
|
| 44 |
json_object = json.dumps(resume_dictionary, sort_keys=True, indent=14)
|
| 45 |
with open(save_directory_name, "w+") as outfile:
|
scripts/KeytermsExtraction.py
CHANGED
|
@@ -16,8 +16,7 @@ class KeytermExtractor:
|
|
| 16 |
top_n_values (int): The number of top keyterms to extract.
|
| 17 |
"""
|
| 18 |
self.raw_text = raw_text
|
| 19 |
-
self.text_doc = textacy.make_spacy_doc(
|
| 20 |
-
self.raw_text, lang="en_core_web_md")
|
| 21 |
self.top_n_values = top_n_values
|
| 22 |
|
| 23 |
def get_keyterms_based_on_textrank(self):
|
|
@@ -27,8 +26,11 @@ class KeytermExtractor:
|
|
| 27 |
Returns:
|
| 28 |
List[str]: A list of top keyterms based on TextRank.
|
| 29 |
"""
|
| 30 |
-
return list(
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def get_keyterms_based_on_sgrank(self):
|
| 34 |
"""
|
|
@@ -37,8 +39,11 @@ class KeytermExtractor:
|
|
| 37 |
Returns:
|
| 38 |
List[str]: A list of top keyterms based on SGRank.
|
| 39 |
"""
|
| 40 |
-
return list(
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def get_keyterms_based_on_scake(self):
|
| 44 |
"""
|
|
@@ -47,8 +52,11 @@ class KeytermExtractor:
|
|
| 47 |
Returns:
|
| 48 |
List[str]: A list of top keyterms based on sCAKE.
|
| 49 |
"""
|
| 50 |
-
return list(
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
def get_keyterms_based_on_yake(self):
|
| 54 |
"""
|
|
@@ -57,8 +65,11 @@ class KeytermExtractor:
|
|
| 57 |
Returns:
|
| 58 |
List[str]: A list of top keyterms based on YAKE.
|
| 59 |
"""
|
| 60 |
-
return list(
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def bi_gramchunker(self):
|
| 64 |
"""
|
|
@@ -67,8 +78,15 @@ class KeytermExtractor:
|
|
| 67 |
Returns:
|
| 68 |
List[str]: A list of bigrams.
|
| 69 |
"""
|
| 70 |
-
return list(
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
def tri_gramchunker(self):
|
| 74 |
"""
|
|
@@ -77,5 +95,12 @@ class KeytermExtractor:
|
|
| 77 |
Returns:
|
| 78 |
List[str]: A list of trigrams.
|
| 79 |
"""
|
| 80 |
-
return list(
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
top_n_values (int): The number of top keyterms to extract.
|
| 17 |
"""
|
| 18 |
self.raw_text = raw_text
|
| 19 |
+
self.text_doc = textacy.make_spacy_doc(self.raw_text, lang="en_core_web_md")
|
|
|
|
| 20 |
self.top_n_values = top_n_values
|
| 21 |
|
| 22 |
def get_keyterms_based_on_textrank(self):
|
|
|
|
| 26 |
Returns:
|
| 27 |
List[str]: A list of top keyterms based on TextRank.
|
| 28 |
"""
|
| 29 |
+
return list(
|
| 30 |
+
extract.keyterms.textrank(
|
| 31 |
+
self.text_doc, normalize="lemma", topn=self.top_n_values
|
| 32 |
+
)
|
| 33 |
+
)
|
| 34 |
|
| 35 |
def get_keyterms_based_on_sgrank(self):
|
| 36 |
"""
|
|
|
|
| 39 |
Returns:
|
| 40 |
List[str]: A list of top keyterms based on SGRank.
|
| 41 |
"""
|
| 42 |
+
return list(
|
| 43 |
+
extract.keyterms.sgrank(
|
| 44 |
+
self.text_doc, normalize="lemma", topn=self.top_n_values
|
| 45 |
+
)
|
| 46 |
+
)
|
| 47 |
|
| 48 |
def get_keyterms_based_on_scake(self):
|
| 49 |
"""
|
|
|
|
| 52 |
Returns:
|
| 53 |
List[str]: A list of top keyterms based on sCAKE.
|
| 54 |
"""
|
| 55 |
+
return list(
|
| 56 |
+
extract.keyterms.scake(
|
| 57 |
+
self.text_doc, normalize="lemma", topn=self.top_n_values
|
| 58 |
+
)
|
| 59 |
+
)
|
| 60 |
|
| 61 |
def get_keyterms_based_on_yake(self):
|
| 62 |
"""
|
|
|
|
| 65 |
Returns:
|
| 66 |
List[str]: A list of top keyterms based on YAKE.
|
| 67 |
"""
|
| 68 |
+
return list(
|
| 69 |
+
extract.keyterms.yake(
|
| 70 |
+
self.text_doc, normalize="lemma", topn=self.top_n_values
|
| 71 |
+
)
|
| 72 |
+
)
|
| 73 |
|
| 74 |
def bi_gramchunker(self):
|
| 75 |
"""
|
|
|
|
| 78 |
Returns:
|
| 79 |
List[str]: A list of bigrams.
|
| 80 |
"""
|
| 81 |
+
return list(
|
| 82 |
+
textacy.extract.basics.ngrams(
|
| 83 |
+
self.text_doc,
|
| 84 |
+
n=2,
|
| 85 |
+
filter_stops=True,
|
| 86 |
+
filter_nums=True,
|
| 87 |
+
filter_punct=True,
|
| 88 |
+
)
|
| 89 |
+
)
|
| 90 |
|
| 91 |
def tri_gramchunker(self):
|
| 92 |
"""
|
|
|
|
| 95 |
Returns:
|
| 96 |
List[str]: A list of trigrams.
|
| 97 |
"""
|
| 98 |
+
return list(
|
| 99 |
+
textacy.extract.basics.ngrams(
|
| 100 |
+
self.text_doc,
|
| 101 |
+
n=3,
|
| 102 |
+
filter_stops=True,
|
| 103 |
+
filter_nums=True,
|
| 104 |
+
filter_punct=True,
|
| 105 |
+
)
|
| 106 |
+
)
|
scripts/LinkedinJobToPDF.py
CHANGED
|
@@ -8,7 +8,7 @@ from bs4 import BeautifulSoup
|
|
| 8 |
from pathvalidate import sanitize_filename
|
| 9 |
from xhtml2pdf import pisa
|
| 10 |
|
| 11 |
-
|
| 12 |
This script takes a LinkedIn job posting URL
|
| 13 |
and converts the description to a PDF file.
|
| 14 |
The PDF file is saved in the Data/JobDescription folder.
|
|
@@ -16,7 +16,7 @@ The name will be OrgName__Job Title_X.pdf, where X is the number of files in the
|
|
| 16 |
|
| 17 |
IMPORTANT: Make sure the URL is to the actual job description,
|
| 18 |
and not the job search page.
|
| 19 |
-
|
| 20 |
|
| 21 |
|
| 22 |
def linkedin_to_pdf(job_url: str):
|
|
@@ -29,26 +29,30 @@ def linkedin_to_pdf(job_url: str):
|
|
| 29 |
page = requests.get(job_url)
|
| 30 |
|
| 31 |
if page.status_code != 200:
|
| 32 |
-
print(
|
|
|
|
|
|
|
| 33 |
return
|
| 34 |
|
| 35 |
# Parse the HTML content of the job posting using BeautifulSoup
|
| 36 |
-
soup = BeautifulSoup(page.text,
|
| 37 |
|
| 38 |
# Find the job title element and get the text
|
| 39 |
-
job_title = soup.find(
|
| 40 |
|
| 41 |
# Find the organization name element (try both selectors)
|
| 42 |
-
organization_element = soup.find(
|
| 43 |
|
| 44 |
if not organization_element:
|
| 45 |
-
organization_element = soup.find(
|
| 46 |
|
| 47 |
# Extract the organization name
|
| 48 |
organization = organization_element.text.strip()
|
| 49 |
|
| 50 |
# Find the job description element
|
| 51 |
-
job_description_element = soup.find(
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# Extract the job description and concatenate its elements
|
| 54 |
if job_description_element:
|
|
@@ -59,8 +63,8 @@ def linkedin_to_pdf(job_url: str):
|
|
| 59 |
file_path = f"{job_path}{sanitize_filename(organization + '__' + job_title)}_{files_number}.pdf"
|
| 60 |
|
| 61 |
# Create a PDF file and write the job description to it
|
| 62 |
-
with open(file_path,
|
| 63 |
-
pisa.CreatePDF(job_description, dest=pdf_file, encoding=
|
| 64 |
|
| 65 |
logging.info("PDF saved to " + file_path)
|
| 66 |
|
|
@@ -72,4 +76,4 @@ def linkedin_to_pdf(job_url: str):
|
|
| 72 |
|
| 73 |
if __name__ == "__main__":
|
| 74 |
url = easygui.enterbox("Enter the URL of the LinkedIn Job Posting:").strip()
|
| 75 |
-
linkedin_to_pdf(url)
|
|
|
|
| 8 |
from pathvalidate import sanitize_filename
|
| 9 |
from xhtml2pdf import pisa
|
| 10 |
|
| 11 |
+
"""
|
| 12 |
This script takes a LinkedIn job posting URL
|
| 13 |
and converts the description to a PDF file.
|
| 14 |
The PDF file is saved in the Data/JobDescription folder.
|
|
|
|
| 16 |
|
| 17 |
IMPORTANT: Make sure the URL is to the actual job description,
|
| 18 |
and not the job search page.
|
| 19 |
+
"""
|
| 20 |
|
| 21 |
|
| 22 |
def linkedin_to_pdf(job_url: str):
|
|
|
|
| 29 |
page = requests.get(job_url)
|
| 30 |
|
| 31 |
if page.status_code != 200:
|
| 32 |
+
print(
|
| 33 |
+
f"Failed to retrieve the job posting at {job_url}. Status code: {page.status_code}"
|
| 34 |
+
)
|
| 35 |
return
|
| 36 |
|
| 37 |
# Parse the HTML content of the job posting using BeautifulSoup
|
| 38 |
+
soup = BeautifulSoup(page.text, "html.parser")
|
| 39 |
|
| 40 |
# Find the job title element and get the text
|
| 41 |
+
job_title = soup.find("h1", {"class": "topcard__title"}).text.strip()
|
| 42 |
|
| 43 |
# Find the organization name element (try both selectors)
|
| 44 |
+
organization_element = soup.find("span", {"class": "topcard__flavor"})
|
| 45 |
|
| 46 |
if not organization_element:
|
| 47 |
+
organization_element = soup.find("a", {"class": "topcard__org-name-link"})
|
| 48 |
|
| 49 |
# Extract the organization name
|
| 50 |
organization = organization_element.text.strip()
|
| 51 |
|
| 52 |
# Find the job description element
|
| 53 |
+
job_description_element = soup.find(
|
| 54 |
+
"div", {"class": "show-more-less-html__markup"}
|
| 55 |
+
)
|
| 56 |
|
| 57 |
# Extract the job description and concatenate its elements
|
| 58 |
if job_description_element:
|
|
|
|
| 63 |
file_path = f"{job_path}{sanitize_filename(organization + '__' + job_title)}_{files_number}.pdf"
|
| 64 |
|
| 65 |
# Create a PDF file and write the job description to it
|
| 66 |
+
with open(file_path, "wb") as pdf_file:
|
| 67 |
+
pisa.CreatePDF(job_description, dest=pdf_file, encoding="utf-8")
|
| 68 |
|
| 69 |
logging.info("PDF saved to " + file_path)
|
| 70 |
|
|
|
|
| 76 |
|
| 77 |
if __name__ == "__main__":
|
| 78 |
url = easygui.enterbox("Enter the URL of the LinkedIn Job Posting:").strip()
|
| 79 |
+
linkedin_to_pdf(url)
|
scripts/ReadPdf.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
-
import os
|
| 2 |
import glob
|
|
|
|
|
|
|
| 3 |
from pypdf import PdfReader
|
| 4 |
|
| 5 |
|
|
@@ -14,7 +15,7 @@ def get_pdf_files(file_path):
|
|
| 14 |
list: A list containing the paths of all the PDF files in the directory.
|
| 15 |
"""
|
| 16 |
if os.path.exists(file_path):
|
| 17 |
-
return glob.glob(os.path.join(file_path,
|
| 18 |
else:
|
| 19 |
return []
|
| 20 |
|
|
@@ -33,7 +34,7 @@ def read_multiple_pdf(file_path: str) -> list:
|
|
| 33 |
output = []
|
| 34 |
for file in pdf_files:
|
| 35 |
try:
|
| 36 |
-
with open(file,
|
| 37 |
pdf_reader = PdfReader(f)
|
| 38 |
count = pdf_reader.getNumPages()
|
| 39 |
for i in range(count):
|
|
@@ -56,7 +57,7 @@ def read_single_pdf(file_path: str) -> str:
|
|
| 56 |
"""
|
| 57 |
output = []
|
| 58 |
try:
|
| 59 |
-
with open(file_path,
|
| 60 |
pdf_reader = PdfReader(f)
|
| 61 |
count = len(pdf_reader.pages)
|
| 62 |
for i in range(count):
|
|
@@ -79,7 +80,7 @@ def get_pdf_files(file_path: str) -> list:
|
|
| 79 |
"""
|
| 80 |
pdf_files = []
|
| 81 |
try:
|
| 82 |
-
pdf_files = glob.glob(os.path.join(file_path,
|
| 83 |
except Exception as e:
|
| 84 |
print(f"Error getting PDF files from '{file_path}': {str(e)}")
|
| 85 |
return pdf_files
|
|
|
|
|
|
|
| 1 |
import glob
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
from pypdf import PdfReader
|
| 5 |
|
| 6 |
|
|
|
|
| 15 |
list: A list containing the paths of all the PDF files in the directory.
|
| 16 |
"""
|
| 17 |
if os.path.exists(file_path):
|
| 18 |
+
return glob.glob(os.path.join(file_path, "*.pdf"))
|
| 19 |
else:
|
| 20 |
return []
|
| 21 |
|
|
|
|
| 34 |
output = []
|
| 35 |
for file in pdf_files:
|
| 36 |
try:
|
| 37 |
+
with open(file, "rb") as f:
|
| 38 |
pdf_reader = PdfReader(f)
|
| 39 |
count = pdf_reader.getNumPages()
|
| 40 |
for i in range(count):
|
|
|
|
| 57 |
"""
|
| 58 |
output = []
|
| 59 |
try:
|
| 60 |
+
with open(file_path, "rb") as f:
|
| 61 |
pdf_reader = PdfReader(f)
|
| 62 |
count = len(pdf_reader.pages)
|
| 63 |
for i in range(count):
|
|
|
|
| 80 |
"""
|
| 81 |
pdf_files = []
|
| 82 |
try:
|
| 83 |
+
pdf_files = glob.glob(os.path.join(file_path, "*.pdf"))
|
| 84 |
except Exception as e:
|
| 85 |
print(f"Error getting PDF files from '{file_path}': {str(e)}")
|
| 86 |
return pdf_files
|
scripts/ResumeProcessor.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
-
|
| 2 |
-
from .parsers import ParseJobDesc
|
| 3 |
-
from .ReadPdf import read_single_pdf
|
| 4 |
import os.path
|
| 5 |
import pathlib
|
| 6 |
-
import json
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
class ResumeProcessor:
|
|
@@ -34,8 +34,9 @@ class ResumeProcessor:
|
|
| 34 |
return output
|
| 35 |
|
| 36 |
def _write_json_file(self, resume_dictionary: dict):
|
| 37 |
-
file_name = str(
|
| 38 |
-
|
|
|
|
| 39 |
save_directory_name = pathlib.Path(SAVE_DIRECTORY) / file_name
|
| 40 |
json_object = json.dumps(resume_dictionary, sort_keys=True, indent=14)
|
| 41 |
with open(save_directory_name, "w+") as outfile:
|
|
|
|
| 1 |
+
import json
|
|
|
|
|
|
|
| 2 |
import os.path
|
| 3 |
import pathlib
|
|
|
|
| 4 |
|
| 5 |
+
from .parsers import ParseJobDesc, ParseResume
|
| 6 |
+
from .ReadPdf import read_single_pdf
|
| 7 |
+
|
| 8 |
+
READ_RESUME_FROM = "Data/Resumes/"
|
| 9 |
+
SAVE_DIRECTORY = "Data/Processed/Resumes"
|
| 10 |
|
| 11 |
|
| 12 |
class ResumeProcessor:
|
|
|
|
| 34 |
return output
|
| 35 |
|
| 36 |
def _write_json_file(self, resume_dictionary: dict):
|
| 37 |
+
file_name = str(
|
| 38 |
+
"Resume-" + self.input_file + resume_dictionary["unique_id"] + ".json"
|
| 39 |
+
)
|
| 40 |
save_directory_name = pathlib.Path(SAVE_DIRECTORY) / file_name
|
| 41 |
json_object = json.dumps(resume_dictionary, sort_keys=True, indent=14)
|
| 42 |
with open(save_directory_name, "w+") as outfile:
|
scripts/TextCleaner.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
|
|
|
|
|
| 1 |
import nltk
|
| 2 |
from nltk.corpus import stopwords
|
| 3 |
-
from nltk.tokenize import word_tokenize
|
| 4 |
from nltk.stem import WordNetLemmatizer
|
| 5 |
-
import
|
| 6 |
|
| 7 |
|
| 8 |
class TextCleaner:
|
| 9 |
|
| 10 |
def __init__(self, raw_text):
|
| 11 |
-
self.stopwords_set = set(stopwords.words(
|
| 12 |
-
'english') + list(string.punctuation))
|
| 13 |
self.lemmatizer = WordNetLemmatizer()
|
| 14 |
self.raw_input_text = raw_text
|
| 15 |
|
|
@@ -17,5 +17,5 @@ class TextCleaner:
|
|
| 17 |
tokens = word_tokenize(self.raw_input_text.lower())
|
| 18 |
tokens = [token for token in tokens if token not in self.stopwords_set]
|
| 19 |
tokens = [self.lemmatizer.lemmatize(token) for token in tokens]
|
| 20 |
-
cleaned_text =
|
| 21 |
return cleaned_text
|
|
|
|
| 1 |
+
import string
|
| 2 |
+
|
| 3 |
import nltk
|
| 4 |
from nltk.corpus import stopwords
|
|
|
|
| 5 |
from nltk.stem import WordNetLemmatizer
|
| 6 |
+
from nltk.tokenize import word_tokenize
|
| 7 |
|
| 8 |
|
| 9 |
class TextCleaner:
|
| 10 |
|
| 11 |
def __init__(self, raw_text):
|
| 12 |
+
self.stopwords_set = set(stopwords.words("english") + list(string.punctuation))
|
|
|
|
| 13 |
self.lemmatizer = WordNetLemmatizer()
|
| 14 |
self.raw_input_text = raw_text
|
| 15 |
|
|
|
|
| 17 |
tokens = word_tokenize(self.raw_input_text.lower())
|
| 18 |
tokens = [token for token in tokens if token not in self.stopwords_set]
|
| 19 |
tokens = [self.lemmatizer.lemmatize(token) for token in tokens]
|
| 20 |
+
cleaned_text = " ".join(tokens)
|
| 21 |
return cleaned_text
|
scripts/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
from . import ReadPdf
|
| 2 |
from .JobDescriptionProcessor import JobDescriptionProcessor
|
| 3 |
-
from .ResumeProcessor import ResumeProcessor
|
|
|
|
| 1 |
from . import ReadPdf
|
| 2 |
from .JobDescriptionProcessor import JobDescriptionProcessor
|
| 3 |
+
from .ResumeProcessor import ResumeProcessor
|
scripts/parsers/ParseJobDescToJson.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
import json
|
|
|
|
| 2 |
import pathlib
|
|
|
|
| 3 |
from scripts.Extractor import DataExtractor
|
| 4 |
-
from scripts.utils.Utils import TextCleaner, CountFrequency, generate_unique_id
|
| 5 |
from scripts.KeytermsExtraction import KeytermExtractor
|
| 6 |
-
import
|
| 7 |
|
| 8 |
SAVE_DIRECTORY = "../../Data/Processed/JobDescription"
|
| 9 |
|
|
@@ -12,15 +13,11 @@ class ParseJobDesc:
|
|
| 12 |
|
| 13 |
def __init__(self, job_desc: str):
|
| 14 |
self.job_desc_data = job_desc
|
| 15 |
-
self.clean_data = TextCleaner.clean_text(
|
| 16 |
-
self.job_desc_data)
|
| 17 |
self.entities = DataExtractor(self.clean_data).extract_entities()
|
| 18 |
-
self.key_words = DataExtractor(
|
| 19 |
-
|
| 20 |
-
self.
|
| 21 |
-
self.clean_data).count_frequency()
|
| 22 |
-
self.keyterms = KeytermExtractor(
|
| 23 |
-
self.clean_data).get_keyterms_based_on_sgrank()
|
| 24 |
self.bi_grams = KeytermExtractor(self.clean_data).bi_gramchunker()
|
| 25 |
self.tri_grams = KeytermExtractor(self.clean_data).tri_gramchunker()
|
| 26 |
|
|
@@ -37,7 +34,7 @@ class ParseJobDesc:
|
|
| 37 |
"keyterms": self.keyterms,
|
| 38 |
"bi_grams": str(self.bi_grams),
|
| 39 |
"tri_grams": str(self.tri_grams),
|
| 40 |
-
"pos_frequencies": self.pos_frequencies
|
| 41 |
}
|
| 42 |
|
| 43 |
return job_desc_dictionary
|
|
|
|
| 1 |
import json
|
| 2 |
+
import os
|
| 3 |
import pathlib
|
| 4 |
+
|
| 5 |
from scripts.Extractor import DataExtractor
|
|
|
|
| 6 |
from scripts.KeytermsExtraction import KeytermExtractor
|
| 7 |
+
from scripts.utils.Utils import CountFrequency, TextCleaner, generate_unique_id
|
| 8 |
|
| 9 |
SAVE_DIRECTORY = "../../Data/Processed/JobDescription"
|
| 10 |
|
|
|
|
| 13 |
|
| 14 |
def __init__(self, job_desc: str):
|
| 15 |
self.job_desc_data = job_desc
|
| 16 |
+
self.clean_data = TextCleaner.clean_text(self.job_desc_data)
|
|
|
|
| 17 |
self.entities = DataExtractor(self.clean_data).extract_entities()
|
| 18 |
+
self.key_words = DataExtractor(self.clean_data).extract_particular_words()
|
| 19 |
+
self.pos_frequencies = CountFrequency(self.clean_data).count_frequency()
|
| 20 |
+
self.keyterms = KeytermExtractor(self.clean_data).get_keyterms_based_on_sgrank()
|
|
|
|
|
|
|
|
|
|
| 21 |
self.bi_grams = KeytermExtractor(self.clean_data).bi_gramchunker()
|
| 22 |
self.tri_grams = KeytermExtractor(self.clean_data).tri_gramchunker()
|
| 23 |
|
|
|
|
| 34 |
"keyterms": self.keyterms,
|
| 35 |
"bi_grams": str(self.bi_grams),
|
| 36 |
"tri_grams": str(self.tri_grams),
|
| 37 |
+
"pos_frequencies": self.pos_frequencies,
|
| 38 |
}
|
| 39 |
|
| 40 |
return job_desc_dictionary
|
scripts/parsers/ParseResumeToJson.py
CHANGED
|
@@ -1,32 +1,29 @@
|
|
| 1 |
import json
|
| 2 |
-
from scripts.Extractor import DataExtractor
|
| 3 |
-
from scripts.utils.Utils import TextCleaner, CountFrequency, generate_unique_id
|
| 4 |
-
from scripts.KeytermsExtraction import KeytermExtractor
|
| 5 |
-
import os.path
|
| 6 |
import os
|
|
|
|
| 7 |
import pathlib
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
class ParseResume:
|
| 13 |
|
| 14 |
def __init__(self, resume: str):
|
| 15 |
self.resume_data = resume
|
| 16 |
-
self.clean_data = TextCleaner.clean_text(
|
| 17 |
-
self.resume_data)
|
| 18 |
self.entities = DataExtractor(self.clean_data).extract_entities()
|
| 19 |
self.name = DataExtractor(self.clean_data[:30]).extract_names()
|
| 20 |
self.experience = DataExtractor(self.clean_data).extract_experience()
|
| 21 |
self.emails = DataExtractor(self.resume_data).extract_emails()
|
| 22 |
self.phones = DataExtractor(self.resume_data).extract_phone_numbers()
|
| 23 |
self.years = DataExtractor(self.clean_data).extract_position_year()
|
| 24 |
-
self.key_words = DataExtractor(
|
| 25 |
-
|
| 26 |
-
self.
|
| 27 |
-
self.clean_data).count_frequency()
|
| 28 |
-
self.keyterms = KeytermExtractor(
|
| 29 |
-
self.clean_data).get_keyterms_based_on_sgrank()
|
| 30 |
self.bi_grams = KeytermExtractor(self.clean_data).bi_gramchunker()
|
| 31 |
self.tri_grams = KeytermExtractor(self.clean_data).tri_gramchunker()
|
| 32 |
|
|
@@ -48,7 +45,7 @@ class ParseResume:
|
|
| 48 |
"years": self.years,
|
| 49 |
"bi_grams": str(self.bi_grams),
|
| 50 |
"tri_grams": str(self.tri_grams),
|
| 51 |
-
"pos_frequencies": self.pos_frequencies
|
| 52 |
}
|
| 53 |
|
| 54 |
return resume_dictionary
|
|
|
|
| 1 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
+
import os.path
|
| 4 |
import pathlib
|
| 5 |
|
| 6 |
+
from scripts.Extractor import DataExtractor
|
| 7 |
+
from scripts.KeytermsExtraction import KeytermExtractor
|
| 8 |
+
from scripts.utils.Utils import CountFrequency, TextCleaner, generate_unique_id
|
| 9 |
+
|
| 10 |
+
SAVE_DIRECTORY = "../../Data/Processed/Resumes"
|
| 11 |
|
| 12 |
|
| 13 |
class ParseResume:
|
| 14 |
|
| 15 |
def __init__(self, resume: str):
|
| 16 |
self.resume_data = resume
|
| 17 |
+
self.clean_data = TextCleaner.clean_text(self.resume_data)
|
|
|
|
| 18 |
self.entities = DataExtractor(self.clean_data).extract_entities()
|
| 19 |
self.name = DataExtractor(self.clean_data[:30]).extract_names()
|
| 20 |
self.experience = DataExtractor(self.clean_data).extract_experience()
|
| 21 |
self.emails = DataExtractor(self.resume_data).extract_emails()
|
| 22 |
self.phones = DataExtractor(self.resume_data).extract_phone_numbers()
|
| 23 |
self.years = DataExtractor(self.clean_data).extract_position_year()
|
| 24 |
+
self.key_words = DataExtractor(self.clean_data).extract_particular_words()
|
| 25 |
+
self.pos_frequencies = CountFrequency(self.clean_data).count_frequency()
|
| 26 |
+
self.keyterms = KeytermExtractor(self.clean_data).get_keyterms_based_on_sgrank()
|
|
|
|
|
|
|
|
|
|
| 27 |
self.bi_grams = KeytermExtractor(self.clean_data).bi_gramchunker()
|
| 28 |
self.tri_grams = KeytermExtractor(self.clean_data).tri_gramchunker()
|
| 29 |
|
|
|
|
| 45 |
"years": self.years,
|
| 46 |
"bi_grams": str(self.bi_grams),
|
| 47 |
"tri_grams": str(self.tri_grams),
|
| 48 |
+
"pos_frequencies": self.pos_frequencies,
|
| 49 |
}
|
| 50 |
|
| 51 |
return resume_dictionary
|
scripts/parsers/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
from .ParseJobDescToJson import ParseJobDesc
|
| 2 |
-
from .ParseResumeToJson import ParseResume
|
|
|
|
| 1 |
from .ParseJobDescToJson import ParseJobDesc
|
| 2 |
+
from .ParseResumeToJson import ParseResume
|
scripts/similarity/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
from .get_similarity_score import
|
|
|
|
| 1 |
+
from .get_similarity_score import find_path, get_similarity_score, read_config
|
scripts/similarity/get_score.py
CHANGED
|
@@ -6,49 +6,65 @@ from typing import List
|
|
| 6 |
import yaml
|
| 7 |
from qdrant_client import QdrantClient
|
| 8 |
|
| 9 |
-
|
| 10 |
-
filename='app_similarity_score.log',
|
| 11 |
-
filemode='w',
|
| 12 |
-
level=logging.INFO,
|
| 13 |
-
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
| 14 |
-
)
|
| 15 |
|
|
|
|
|
|
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
-
logger.setLevel(logging.DEBUG)
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
console_handler.setFormatter(formatter)
|
| 22 |
-
console_handler.setLevel(logging.DEBUG)
|
| 23 |
-
|
| 24 |
-
file_handler = logging.FileHandler("app_similarity_score.log")
|
| 25 |
-
file_handler.setLevel(logging.DEBUG)
|
| 26 |
-
file_handler.setFormatter(formatter)
|
| 27 |
-
|
| 28 |
-
logger.addHandler(file_handler)
|
| 29 |
-
logger.addHandler(console_handler)
|
| 30 |
|
| 31 |
|
| 32 |
def find_path(folder_name):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
curr_dir = os.getcwd()
|
| 34 |
while True:
|
| 35 |
if folder_name in os.listdir(curr_dir):
|
| 36 |
return os.path.join(curr_dir, folder_name)
|
| 37 |
else:
|
| 38 |
parent_dir = os.path.dirname(curr_dir)
|
| 39 |
-
if parent_dir ==
|
| 40 |
break
|
| 41 |
curr_dir = parent_dir
|
| 42 |
raise ValueError(f"Folder '{folder_name}' not found.")
|
| 43 |
|
| 44 |
|
| 45 |
-
cwd = find_path(
|
| 46 |
-
READ_RESUME_FROM = os.path.join(cwd,
|
| 47 |
-
READ_JOB_DESCRIPTION_FROM = os.path.join(cwd,
|
| 48 |
config_path = os.path.join(cwd, "scripts", "similarity")
|
| 49 |
|
| 50 |
|
| 51 |
def read_config(filepath):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
try:
|
| 53 |
with open(filepath) as f:
|
| 54 |
config = yaml.safe_load(f)
|
|
@@ -56,23 +72,55 @@ def read_config(filepath):
|
|
| 56 |
except FileNotFoundError as e:
|
| 57 |
logger.error(f"Configuration file {filepath} not found: {e}")
|
| 58 |
except yaml.YAMLError as e:
|
| 59 |
-
logger.error(
|
|
|
|
|
|
|
| 60 |
except Exception as e:
|
| 61 |
logger.error(f"Error reading configuration file {filepath}: {e}")
|
| 62 |
return None
|
| 63 |
|
| 64 |
|
| 65 |
def read_doc(path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
with open(path) as f:
|
| 67 |
try:
|
| 68 |
data = json.load(f)
|
| 69 |
except Exception as e:
|
| 70 |
-
logger.error(f
|
| 71 |
data = {}
|
| 72 |
return data
|
| 73 |
|
| 74 |
|
| 75 |
def get_score(resume_string, job_description_string):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
logger.info("Started getting similarity score")
|
| 77 |
|
| 78 |
documents: List[str] = [resume_string]
|
|
@@ -85,8 +133,7 @@ def get_score(resume_string, job_description_string):
|
|
| 85 |
)
|
| 86 |
|
| 87 |
search_result = client.query(
|
| 88 |
-
collection_name="demo_collection",
|
| 89 |
-
query_text=job_description_string
|
| 90 |
)
|
| 91 |
logger.info("Finished getting similarity score")
|
| 92 |
return search_result
|
|
@@ -95,14 +142,18 @@ def get_score(resume_string, job_description_string):
|
|
| 95 |
if __name__ == "__main__":
|
| 96 |
# To give your custom resume use this code
|
| 97 |
resume_dict = read_config(
|
| 98 |
-
READ_RESUME_FROM
|
|
|
|
|
|
|
| 99 |
job_dict = read_config(
|
| 100 |
-
READ_JOB_DESCRIPTION_FROM
|
|
|
|
|
|
|
| 101 |
resume_keywords = resume_dict["extracted_keywords"]
|
| 102 |
job_description_keywords = job_dict["extracted_keywords"]
|
| 103 |
|
| 104 |
-
resume_string =
|
| 105 |
-
jd_string =
|
| 106 |
final_result = get_score(resume_string, jd_string)
|
| 107 |
for r in final_result:
|
| 108 |
print(r.score)
|
|
|
|
| 6 |
import yaml
|
| 7 |
from qdrant_client import QdrantClient
|
| 8 |
|
| 9 |
+
from scripts.utils.logger import init_logging_config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
init_logging_config(basic_log_level=logging.INFO)
|
| 12 |
+
# Get the logger
|
| 13 |
logger = logging.getLogger(__name__)
|
|
|
|
| 14 |
|
| 15 |
+
# Set the logging level
|
| 16 |
+
logger.setLevel(logging.INFO)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def find_path(folder_name):
|
| 20 |
+
"""
|
| 21 |
+
The function `find_path` searches for a folder by name starting from the current directory and
|
| 22 |
+
traversing up the directory tree until the folder is found or the root directory is reached.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
folder_name: The `find_path` function you provided is designed to search for a folder by name
|
| 26 |
+
starting from the current working directory and moving up the directory tree until it finds the
|
| 27 |
+
folder or reaches the root directory.
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
The `find_path` function is designed to search for a folder with the given `folder_name` starting
|
| 31 |
+
from the current working directory (`os.getcwd()`). It iterates through the directory structure,
|
| 32 |
+
checking if the folder exists in the current directory or any of its parent directories. If the
|
| 33 |
+
folder is found, it returns the full path to that folder using `os.path.join(curr_dir, folder_name)`
|
| 34 |
+
"""
|
| 35 |
curr_dir = os.getcwd()
|
| 36 |
while True:
|
| 37 |
if folder_name in os.listdir(curr_dir):
|
| 38 |
return os.path.join(curr_dir, folder_name)
|
| 39 |
else:
|
| 40 |
parent_dir = os.path.dirname(curr_dir)
|
| 41 |
+
if parent_dir == "/":
|
| 42 |
break
|
| 43 |
curr_dir = parent_dir
|
| 44 |
raise ValueError(f"Folder '{folder_name}' not found.")
|
| 45 |
|
| 46 |
|
| 47 |
+
cwd = find_path("Resume-Matcher")
|
| 48 |
+
READ_RESUME_FROM = os.path.join(cwd, "Data", "Processed", "Resumes")
|
| 49 |
+
READ_JOB_DESCRIPTION_FROM = os.path.join(cwd, "Data", "Processed", "JobDescription")
|
| 50 |
config_path = os.path.join(cwd, "scripts", "similarity")
|
| 51 |
|
| 52 |
|
| 53 |
def read_config(filepath):
|
| 54 |
+
"""
|
| 55 |
+
The `read_config` function reads a configuration file in YAML format and handles exceptions related
|
| 56 |
+
to file not found or parsing errors.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
filepath: The `filepath` parameter in the `read_config` function is a string that represents the
|
| 60 |
+
path to the configuration file that you want to read and parse. This function attempts to open the
|
| 61 |
+
file specified by `filepath`, load its contents as YAML, and return the parsed configuration. If any
|
| 62 |
+
errors occur during
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
The function `read_config` will return the configuration loaded from the file if successful, or
|
| 66 |
+
`None` if there was an error during the process.
|
| 67 |
+
"""
|
| 68 |
try:
|
| 69 |
with open(filepath) as f:
|
| 70 |
config = yaml.safe_load(f)
|
|
|
|
| 72 |
except FileNotFoundError as e:
|
| 73 |
logger.error(f"Configuration file {filepath} not found: {e}")
|
| 74 |
except yaml.YAMLError as e:
|
| 75 |
+
logger.error(
|
| 76 |
+
f"Error parsing YAML in configuration file {filepath}: {e}", exc_info=True
|
| 77 |
+
)
|
| 78 |
except Exception as e:
|
| 79 |
logger.error(f"Error reading configuration file {filepath}: {e}")
|
| 80 |
return None
|
| 81 |
|
| 82 |
|
| 83 |
def read_doc(path):
|
| 84 |
+
"""
|
| 85 |
+
The `read_doc` function reads a JSON file from the specified path and returns its contents, handling
|
| 86 |
+
any exceptions that may occur during the process.
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
path: The `path` parameter in the `read_doc` function is a string that represents the file path to
|
| 90 |
+
the JSON document that you want to read and load. This function reads the JSON data from the file
|
| 91 |
+
located at the specified path.
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
The function `read_doc(path)` reads a JSON file located at the specified `path`, and returns the
|
| 95 |
+
data loaded from the file. If there is an error reading the JSON file, it logs the error message and
|
| 96 |
+
returns an empty dictionary `{}`.
|
| 97 |
+
"""
|
| 98 |
with open(path) as f:
|
| 99 |
try:
|
| 100 |
data = json.load(f)
|
| 101 |
except Exception as e:
|
| 102 |
+
logger.error(f"Error reading JSON file: {e}")
|
| 103 |
data = {}
|
| 104 |
return data
|
| 105 |
|
| 106 |
|
| 107 |
def get_score(resume_string, job_description_string):
|
| 108 |
+
"""
|
| 109 |
+
The function `get_score` uses QdrantClient to calculate the similarity score between a resume and a
|
| 110 |
+
job description.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
resume_string: The `resume_string` parameter is a string containing the text of a resume. It
|
| 114 |
+
represents the content of a resume that you want to compare with a job description.
|
| 115 |
+
job_description_string: The `get_score` function you provided seems to be using a QdrantClient to
|
| 116 |
+
calculate the similarity score between a resume and a job description. The function takes in two
|
| 117 |
+
parameters: `resume_string` and `job_description_string`, where `resume_string` is the text content
|
| 118 |
+
of the resume and
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
The function `get_score` returns the search result obtained by querying a QdrantClient with the
|
| 122 |
+
job description string against the resume string provided.
|
| 123 |
+
"""
|
| 124 |
logger.info("Started getting similarity score")
|
| 125 |
|
| 126 |
documents: List[str] = [resume_string]
|
|
|
|
| 133 |
)
|
| 134 |
|
| 135 |
search_result = client.query(
|
| 136 |
+
collection_name="demo_collection", query_text=job_description_string
|
|
|
|
| 137 |
)
|
| 138 |
logger.info("Finished getting similarity score")
|
| 139 |
return search_result
|
|
|
|
| 142 |
if __name__ == "__main__":
|
| 143 |
# To give your custom resume use this code
|
| 144 |
resume_dict = read_config(
|
| 145 |
+
READ_RESUME_FROM
|
| 146 |
+
+ "/Resume-alfred_pennyworth_pm.pdf83632b66-5cce-4322-a3c6-895ff7e3dd96.json"
|
| 147 |
+
)
|
| 148 |
job_dict = read_config(
|
| 149 |
+
READ_JOB_DESCRIPTION_FROM
|
| 150 |
+
+ "/JobDescription-job_desc_product_manager.pdf6763dc68-12ff-4b32-b652-ccee195de071.json"
|
| 151 |
+
)
|
| 152 |
resume_keywords = resume_dict["extracted_keywords"]
|
| 153 |
job_description_keywords = job_dict["extracted_keywords"]
|
| 154 |
|
| 155 |
+
resume_string = " ".join(resume_keywords)
|
| 156 |
+
jd_string = " ".join(job_description_keywords)
|
| 157 |
final_result = get_score(resume_string, jd_string)
|
| 158 |
for r in final_result:
|
| 159 |
print(r.score)
|
scripts/similarity/get_similarity_score.py
CHANGED
|
@@ -7,49 +7,65 @@ import yaml
|
|
| 7 |
from qdrant_client import QdrantClient, models
|
| 8 |
from qdrant_client.http.models import Batch
|
| 9 |
|
| 10 |
-
|
| 11 |
-
filename='app_similarity_score.log',
|
| 12 |
-
filemode='w',
|
| 13 |
-
level=logging.INFO,
|
| 14 |
-
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
| 15 |
-
)
|
| 16 |
|
|
|
|
|
|
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
-
logger.setLevel(logging.DEBUG)
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
console_handler.setFormatter(formatter)
|
| 23 |
-
console_handler.setLevel(logging.DEBUG)
|
| 24 |
|
| 25 |
-
file_handler =
|
| 26 |
-
file_handler.setLevel(logging.DEBUG)
|
| 27 |
-
file_handler.setFormatter(formatter)
|
| 28 |
-
|
| 29 |
-
logger.addHandler(file_handler)
|
| 30 |
-
logger.addHandler(console_handler)
|
| 31 |
|
| 32 |
|
| 33 |
def find_path(folder_name):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
curr_dir = os.getcwd()
|
| 35 |
while True:
|
| 36 |
if folder_name in os.listdir(curr_dir):
|
| 37 |
return os.path.join(curr_dir, folder_name)
|
| 38 |
else:
|
| 39 |
parent_dir = os.path.dirname(curr_dir)
|
| 40 |
-
if parent_dir ==
|
| 41 |
break
|
| 42 |
curr_dir = parent_dir
|
| 43 |
raise ValueError(f"Folder '{folder_name}' not found.")
|
| 44 |
|
| 45 |
|
| 46 |
-
cwd = find_path(
|
| 47 |
-
READ_RESUME_FROM = os.path.join(cwd,
|
| 48 |
-
READ_JOB_DESCRIPTION_FROM = os.path.join(cwd,
|
| 49 |
config_path = os.path.join(cwd, "scripts", "similarity")
|
| 50 |
|
| 51 |
|
| 52 |
def read_config(filepath):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
try:
|
| 54 |
with open(filepath) as f:
|
| 55 |
config = yaml.safe_load(f)
|
|
@@ -57,28 +73,56 @@ def read_config(filepath):
|
|
| 57 |
except FileNotFoundError as e:
|
| 58 |
logger.error(f"Configuration file {filepath} not found: {e}")
|
| 59 |
except yaml.YAMLError as e:
|
| 60 |
-
logger.error(
|
|
|
|
|
|
|
| 61 |
except Exception as e:
|
| 62 |
logger.error(f"Error reading configuration file {filepath}: {e}")
|
| 63 |
return None
|
| 64 |
|
| 65 |
|
| 66 |
def read_doc(path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
with open(path) as f:
|
| 68 |
try:
|
| 69 |
data = json.load(f)
|
| 70 |
except Exception as e:
|
| 71 |
-
logger.error(f
|
| 72 |
data = {}
|
| 73 |
return data
|
| 74 |
|
| 75 |
|
|
|
|
| 76 |
class QdrantSearch:
|
| 77 |
def __init__(self, resumes, jd):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
config = read_config(config_path + "/config.yml")
|
| 79 |
-
self.cohere_key = config[
|
| 80 |
-
self.qdrant_key = config[
|
| 81 |
-
self.qdrant_url = config[
|
| 82 |
self.resumes = resumes
|
| 83 |
self.jd = jd
|
| 84 |
self.cohere = cohere.Client(self.cohere_key)
|
|
@@ -93,17 +137,30 @@ class QdrantSearch:
|
|
| 93 |
self.qdrant.recreate_collection(
|
| 94 |
collection_name=self.collection_name,
|
| 95 |
vectors_config=models.VectorParams(
|
| 96 |
-
size=vector_size,
|
| 97 |
-
|
| 98 |
-
)
|
| 99 |
)
|
| 100 |
|
| 101 |
self.logger = logging.getLogger(self.__class__.__name__)
|
| 102 |
|
| 103 |
-
self.logger.addHandler(
|
| 104 |
self.logger.addHandler(file_handler)
|
| 105 |
|
| 106 |
def get_embedding(self, text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
try:
|
| 108 |
embeddings = self.cohere.embed([text], "large").embeddings
|
| 109 |
return list(map(float, embeddings[0])), len(embeddings[0])
|
|
@@ -111,6 +168,10 @@ class QdrantSearch:
|
|
| 111 |
self.logger.error(f"Error getting embeddings: {e}", exc_info=True)
|
| 112 |
|
| 113 |
def update_qdrant(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
vectors = []
|
| 115 |
ids = []
|
| 116 |
for i, resume in enumerate(self.resumes):
|
|
@@ -123,33 +184,55 @@ class QdrantSearch:
|
|
| 123 |
points=Batch(
|
| 124 |
ids=ids,
|
| 125 |
vectors=vectors,
|
| 126 |
-
payloads=[{"text": resume} for resume in self.resumes]
|
| 127 |
-
|
| 128 |
-
)
|
| 129 |
)
|
| 130 |
except Exception as e:
|
| 131 |
-
self.logger.error(
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
def search(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
vector, _ = self.get_embedding(self.jd)
|
| 135 |
|
| 136 |
hits = self.qdrant.search(
|
| 137 |
-
collection_name=self.collection_name,
|
| 138 |
-
query_vector=vector,
|
| 139 |
-
limit=30
|
| 140 |
)
|
| 141 |
results = []
|
| 142 |
for hit in hits:
|
| 143 |
-
result = {
|
| 144 |
-
'text': str(hit.payload)[:30],
|
| 145 |
-
'score': hit.score
|
| 146 |
-
}
|
| 147 |
results.append(result)
|
| 148 |
|
| 149 |
return results
|
| 150 |
|
| 151 |
|
| 152 |
def get_similarity_score(resume_string, job_description_string):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
logger.info("Started getting similarity score")
|
| 154 |
qdrant_search = QdrantSearch([resume_string], job_description_string)
|
| 155 |
qdrant_search.update_qdrant()
|
|
@@ -161,15 +244,19 @@ def get_similarity_score(resume_string, job_description_string):
|
|
| 161 |
if __name__ == "__main__":
|
| 162 |
# To give your custom resume use this code
|
| 163 |
resume_dict = read_config(
|
| 164 |
-
READ_RESUME_FROM
|
|
|
|
|
|
|
| 165 |
job_dict = read_config(
|
| 166 |
-
READ_JOB_DESCRIPTION_FROM
|
| 167 |
-
|
|
|
|
|
|
|
| 168 |
resume_keywords = resume_dict["extracted_keywords"]
|
| 169 |
job_description_keywords = job_dict["extracted_keywords"]
|
| 170 |
|
| 171 |
-
resume_string =
|
| 172 |
-
jd_string =
|
| 173 |
final_result = get_similarity_score(resume_string, jd_string)
|
| 174 |
for r in final_result:
|
| 175 |
print(r)
|
|
|
|
| 7 |
from qdrant_client import QdrantClient, models
|
| 8 |
from qdrant_client.http.models import Batch
|
| 9 |
|
| 10 |
+
from scripts.utils.logger import get_handlers, init_logging_config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
init_logging_config(basic_log_level=logging.INFO)
|
| 13 |
+
# Get the logger
|
| 14 |
logger = logging.getLogger(__name__)
|
|
|
|
| 15 |
|
| 16 |
+
# Set the logging level
|
| 17 |
+
logger.setLevel(logging.INFO)
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
stderr_handler, file_handler = get_handlers()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def find_path(folder_name):
|
| 23 |
+
"""
|
| 24 |
+
Find the path of a folder with the given name in the current directory or its parent directories.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
folder_name (str): The name of the folder to search for.
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
str: The path of the folder if found.
|
| 31 |
+
|
| 32 |
+
Raises:
|
| 33 |
+
ValueError: If the folder with the given name is not found in the current directory or its parent directories.
|
| 34 |
+
"""
|
| 35 |
curr_dir = os.getcwd()
|
| 36 |
while True:
|
| 37 |
if folder_name in os.listdir(curr_dir):
|
| 38 |
return os.path.join(curr_dir, folder_name)
|
| 39 |
else:
|
| 40 |
parent_dir = os.path.dirname(curr_dir)
|
| 41 |
+
if parent_dir == "/":
|
| 42 |
break
|
| 43 |
curr_dir = parent_dir
|
| 44 |
raise ValueError(f"Folder '{folder_name}' not found.")
|
| 45 |
|
| 46 |
|
| 47 |
+
cwd = find_path("Resume-Matcher")
|
| 48 |
+
READ_RESUME_FROM = os.path.join(cwd, "Data", "Processed", "Resumes")
|
| 49 |
+
READ_JOB_DESCRIPTION_FROM = os.path.join(cwd, "Data", "Processed", "JobDescription")
|
| 50 |
config_path = os.path.join(cwd, "scripts", "similarity")
|
| 51 |
|
| 52 |
|
| 53 |
def read_config(filepath):
|
| 54 |
+
"""
|
| 55 |
+
Reads a configuration file in YAML format and returns the parsed configuration.
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
filepath (str): The path to the configuration file.
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
dict: The parsed configuration as a dictionary.
|
| 62 |
+
|
| 63 |
+
Raises:
|
| 64 |
+
FileNotFoundError: If the configuration file is not found.
|
| 65 |
+
yaml.YAMLError: If there is an error parsing the YAML in the configuration file.
|
| 66 |
+
Exception: If there is an error reading the configuration file.
|
| 67 |
+
|
| 68 |
+
"""
|
| 69 |
try:
|
| 70 |
with open(filepath) as f:
|
| 71 |
config = yaml.safe_load(f)
|
|
|
|
| 73 |
except FileNotFoundError as e:
|
| 74 |
logger.error(f"Configuration file {filepath} not found: {e}")
|
| 75 |
except yaml.YAMLError as e:
|
| 76 |
+
logger.error(
|
| 77 |
+
f"Error parsing YAML in configuration file {filepath}: {e}", exc_info=True
|
| 78 |
+
)
|
| 79 |
except Exception as e:
|
| 80 |
logger.error(f"Error reading configuration file {filepath}: {e}")
|
| 81 |
return None
|
| 82 |
|
| 83 |
|
| 84 |
def read_doc(path):
|
| 85 |
+
"""
|
| 86 |
+
Read a JSON file and return its contents as a dictionary.
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
path (str): The path to the JSON file.
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
dict: The contents of the JSON file as a dictionary.
|
| 93 |
+
|
| 94 |
+
Raises:
|
| 95 |
+
Exception: If there is an error reading the JSON file.
|
| 96 |
+
"""
|
| 97 |
with open(path) as f:
|
| 98 |
try:
|
| 99 |
data = json.load(f)
|
| 100 |
except Exception as e:
|
| 101 |
+
logger.error(f"Error reading JSON file: {e}")
|
| 102 |
data = {}
|
| 103 |
return data
|
| 104 |
|
| 105 |
|
| 106 |
+
# This class likely performs searches based on quadrants.
|
| 107 |
class QdrantSearch:
|
| 108 |
def __init__(self, resumes, jd):
|
| 109 |
+
"""
|
| 110 |
+
The function initializes various parameters and clients for processing resumes and job
|
| 111 |
+
descriptions.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
resumes: The `resumes` parameter in the `__init__` method seems to be a list of resumes that
|
| 115 |
+
is passed to the class constructor. It is likely used within the class for some processing or
|
| 116 |
+
analysis related to resumes. If you have any specific questions or need further assistance with
|
| 117 |
+
this parameter or any
|
| 118 |
+
jd: The `jd` parameter in the `__init__` method seems to represent a job description. It is
|
| 119 |
+
likely used as input to compare against the resumes provided in the `resumes` parameter. The job
|
| 120 |
+
description is probably used for matching and analyzing against the resumes in the system.
|
| 121 |
+
"""
|
| 122 |
config = read_config(config_path + "/config.yml")
|
| 123 |
+
self.cohere_key = config["cohere"]["api_key"]
|
| 124 |
+
self.qdrant_key = config["qdrant"]["api_key"]
|
| 125 |
+
self.qdrant_url = config["qdrant"]["url"]
|
| 126 |
self.resumes = resumes
|
| 127 |
self.jd = jd
|
| 128 |
self.cohere = cohere.Client(self.cohere_key)
|
|
|
|
| 137 |
self.qdrant.recreate_collection(
|
| 138 |
collection_name=self.collection_name,
|
| 139 |
vectors_config=models.VectorParams(
|
| 140 |
+
size=vector_size, distance=models.Distance.COSINE
|
| 141 |
+
),
|
|
|
|
| 142 |
)
|
| 143 |
|
| 144 |
self.logger = logging.getLogger(self.__class__.__name__)
|
| 145 |
|
| 146 |
+
self.logger.addHandler(stderr_handler)
|
| 147 |
self.logger.addHandler(file_handler)
|
| 148 |
|
| 149 |
def get_embedding(self, text):
|
| 150 |
+
"""
|
| 151 |
+
The function `get_embedding` takes a text input, generates embeddings using the Cohere API, and
|
| 152 |
+
returns the embeddings as a list of floats along with the length of the embeddings.
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
text: The `text` parameter in the `get_embedding` function is a string that represents the
|
| 156 |
+
text for which you want to generate embeddings. This text will be passed to the Cohere API to
|
| 157 |
+
retrieve the embeddings for further processing.
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
The `get_embedding` function returns a tuple containing two elements:
|
| 161 |
+
1. A list of floating-point numbers representing the embeddings of the input text.
|
| 162 |
+
2. The length of the embeddings list.
|
| 163 |
+
"""
|
| 164 |
try:
|
| 165 |
embeddings = self.cohere.embed([text], "large").embeddings
|
| 166 |
return list(map(float, embeddings[0])), len(embeddings[0])
|
|
|
|
| 168 |
self.logger.error(f"Error getting embeddings: {e}", exc_info=True)
|
| 169 |
|
| 170 |
def update_qdrant(self):
|
| 171 |
+
"""
|
| 172 |
+
This Python function updates vectors and corresponding metadata in a Qdrant collection based on
|
| 173 |
+
resumes.
|
| 174 |
+
"""
|
| 175 |
vectors = []
|
| 176 |
ids = []
|
| 177 |
for i, resume in enumerate(self.resumes):
|
|
|
|
| 184 |
points=Batch(
|
| 185 |
ids=ids,
|
| 186 |
vectors=vectors,
|
| 187 |
+
payloads=[{"text": resume} for resume in self.resumes],
|
| 188 |
+
),
|
|
|
|
| 189 |
)
|
| 190 |
except Exception as e:
|
| 191 |
+
self.logger.error(
|
| 192 |
+
f"Error upserting the vectors to the qdrant collection: {e}",
|
| 193 |
+
exc_info=True,
|
| 194 |
+
)
|
| 195 |
|
| 196 |
def search(self):
|
| 197 |
+
"""
|
| 198 |
+
The `search` function retrieves search results based on a query vector using a specified
|
| 199 |
+
collection in a search engine.
|
| 200 |
+
|
| 201 |
+
Returns:
|
| 202 |
+
A list of dictionaries containing the text and score of the search results.
|
| 203 |
+
"""
|
| 204 |
vector, _ = self.get_embedding(self.jd)
|
| 205 |
|
| 206 |
hits = self.qdrant.search(
|
| 207 |
+
collection_name=self.collection_name, query_vector=vector, limit=30
|
|
|
|
|
|
|
| 208 |
)
|
| 209 |
results = []
|
| 210 |
for hit in hits:
|
| 211 |
+
result = {"text": str(hit.payload)[:30], "score": hit.score}
|
|
|
|
|
|
|
|
|
|
| 212 |
results.append(result)
|
| 213 |
|
| 214 |
return results
|
| 215 |
|
| 216 |
|
| 217 |
def get_similarity_score(resume_string, job_description_string):
|
| 218 |
+
"""
|
| 219 |
+
This Python function `get_similarity_score` calculates the similarity score between a resume and a
|
| 220 |
+
job description using QdrantSearch.
|
| 221 |
+
|
| 222 |
+
Args:
|
| 223 |
+
resume_string: The `get_similarity_score` function seems to be using a `QdrantSearch` class to
|
| 224 |
+
calculate the similarity score between a resume and a job description. The `resume_string` parameter
|
| 225 |
+
likely contains the text content of a resume, while the `job_description_string` parameter contains
|
| 226 |
+
the text content of
|
| 227 |
+
job_description_string: The `job_description_string` parameter is a string containing the job
|
| 228 |
+
description for which you want to calculate the similarity score with a given resume. This
|
| 229 |
+
description typically includes details about the job requirements, responsibilities, qualifications,
|
| 230 |
+
and skills needed for the position. The function `get_similarity_score` takes this job description
|
| 231 |
+
|
| 232 |
+
Returns:
|
| 233 |
+
The function `get_similarity_score` returns the search result obtained from comparing a resume
|
| 234 |
+
string with a job description string using a QdrantSearch object.
|
| 235 |
+
"""
|
| 236 |
logger.info("Started getting similarity score")
|
| 237 |
qdrant_search = QdrantSearch([resume_string], job_description_string)
|
| 238 |
qdrant_search.update_qdrant()
|
|
|
|
| 244 |
if __name__ == "__main__":
|
| 245 |
# To give your custom resume use this code
|
| 246 |
resume_dict = read_config(
|
| 247 |
+
READ_RESUME_FROM
|
| 248 |
+
+ "/Resume-bruce_wayne_fullstack.pdf4783d115-e6fc-462e-ae4d-479152884b28.json"
|
| 249 |
+
)
|
| 250 |
job_dict = read_config(
|
| 251 |
+
READ_JOB_DESCRIPTION_FROM
|
| 252 |
+
+ "/JobDescription-job_desc_full_stack_engineer_pdf4de00846-a4fe-4fe5-a4d7"
|
| 253 |
+
"-2a8a1b9ad020.json"
|
| 254 |
+
)
|
| 255 |
resume_keywords = resume_dict["extracted_keywords"]
|
| 256 |
job_description_keywords = job_dict["extracted_keywords"]
|
| 257 |
|
| 258 |
+
resume_string = " ".join(resume_keywords)
|
| 259 |
+
jd_string = " ".join(job_description_keywords)
|
| 260 |
final_result = get_similarity_score(resume_string, jd_string)
|
| 261 |
for r in final_result:
|
| 262 |
print(r)
|
scripts/utils/ReadFiles.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
| 1 |
import os
|
| 2 |
|
|
|
|
| 3 |
def get_filenames_from_dir(directory_path: str) -> list:
|
| 4 |
-
filenames = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
return filenames
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
+
|
| 4 |
def get_filenames_from_dir(directory_path: str) -> list:
|
| 5 |
+
filenames = [
|
| 6 |
+
f
|
| 7 |
+
for f in os.listdir(directory_path)
|
| 8 |
+
if os.path.isfile(os.path.join(directory_path, f)) and f != ".DS_Store"
|
| 9 |
+
]
|
| 10 |
return filenames
|
scripts/utils/Similar.py
CHANGED
|
@@ -6,6 +6,6 @@ def match(resume, job_des):
|
|
| 6 |
s = td.sorensen_dice.similarity(resume, job_des)
|
| 7 |
c = td.cosine.similarity(resume, job_des)
|
| 8 |
o = td.overlap.normalized_similarity(resume, job_des)
|
| 9 |
-
total = (j+s+c+o)/4
|
| 10 |
# total = (s+o)/2
|
| 11 |
-
return total*100
|
|
|
|
| 6 |
s = td.sorensen_dice.similarity(resume, job_des)
|
| 7 |
c = td.cosine.similarity(resume, job_des)
|
| 8 |
o = td.overlap.normalized_similarity(resume, job_des)
|
| 9 |
+
total = (j + s + c + o) / 4
|
| 10 |
# total = (s+o)/2
|
| 11 |
+
return total * 100
|
scripts/utils/Utils.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
| 1 |
-
from uuid import uuid4
|
| 2 |
import re
|
|
|
|
|
|
|
| 3 |
import spacy
|
| 4 |
|
| 5 |
# Load the English model
|
| 6 |
-
nlp = spacy.load(
|
| 7 |
|
| 8 |
REGEX_PATTERNS = {
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
}
|
| 13 |
|
| 14 |
|
|
@@ -38,7 +39,7 @@ class TextCleaner:
|
|
| 38 |
str: The cleaned text.
|
| 39 |
"""
|
| 40 |
for pattern in REGEX_PATTERNS:
|
| 41 |
-
text = re.sub(REGEX_PATTERNS[pattern],
|
| 42 |
return text
|
| 43 |
|
| 44 |
def clean_text(text):
|
|
@@ -54,8 +55,8 @@ class TextCleaner:
|
|
| 54 |
text = TextCleaner.remove_emails_links(text)
|
| 55 |
doc = nlp(text)
|
| 56 |
for token in doc:
|
| 57 |
-
if token.pos_ ==
|
| 58 |
-
text = text.replace(token.text,
|
| 59 |
return str(text)
|
| 60 |
|
| 61 |
def remove_stopwords(text):
|
|
@@ -71,7 +72,7 @@ class TextCleaner:
|
|
| 71 |
doc = nlp(text)
|
| 72 |
for token in doc:
|
| 73 |
if token.is_stop:
|
| 74 |
-
text = text.replace(token.text,
|
| 75 |
return text
|
| 76 |
|
| 77 |
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
+
from uuid import uuid4
|
| 3 |
+
|
| 4 |
import spacy
|
| 5 |
|
| 6 |
# Load the English model
|
| 7 |
+
nlp = spacy.load("en_core_web_md")
|
| 8 |
|
| 9 |
REGEX_PATTERNS = {
|
| 10 |
+
"email_pattern": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
|
| 11 |
+
"phone_pattern": r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}",
|
| 12 |
+
"link_pattern": r"\b(?:https?://|www\.)\S+\b",
|
| 13 |
}
|
| 14 |
|
| 15 |
|
|
|
|
| 39 |
str: The cleaned text.
|
| 40 |
"""
|
| 41 |
for pattern in REGEX_PATTERNS:
|
| 42 |
+
text = re.sub(REGEX_PATTERNS[pattern], "", text)
|
| 43 |
return text
|
| 44 |
|
| 45 |
def clean_text(text):
|
|
|
|
| 55 |
text = TextCleaner.remove_emails_links(text)
|
| 56 |
doc = nlp(text)
|
| 57 |
for token in doc:
|
| 58 |
+
if token.pos_ == "PUNCT":
|
| 59 |
+
text = text.replace(token.text, "")
|
| 60 |
return str(text)
|
| 61 |
|
| 62 |
def remove_stopwords(text):
|
|
|
|
| 72 |
doc = nlp(text)
|
| 73 |
for token in doc:
|
| 74 |
if token.is_stop:
|
| 75 |
+
text = text.replace(token.text, "")
|
| 76 |
return text
|
| 77 |
|
| 78 |
|
scripts/utils/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
from .logger import init_logging_config
|
|
|
|
| 2 |
from .Utils import TextCleaner
|
| 3 |
-
from .ReadFiles import get_filenames_from_dir
|
|
|
|
| 1 |
from .logger import init_logging_config
|
| 2 |
+
from .ReadFiles import get_filenames_from_dir
|
| 3 |
from .Utils import TextCleaner
|
|
|
scripts/utils/logger.py
CHANGED
|
@@ -1,41 +1,145 @@
|
|
| 1 |
import logging
|
| 2 |
|
| 3 |
|
| 4 |
-
def
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
green = "\x1b[32;10m" if not file else ""
|
| 11 |
-
red = "\x1b[31;10m" if not file else ""
|
| 12 |
-
bold_red = "\x1b[31;1m" if not file else ""
|
| 13 |
-
reset = "\x1b[0m" if not file else ""
|
| 14 |
-
log = "%(asctime)s (%(filename)s:%(lineno)d) - %(levelname)s: "
|
| 15 |
-
msg = reset + "%(message)s"
|
| 16 |
-
|
| 17 |
-
self.FORMATS = {
|
| 18 |
-
logging.DEBUG: blue + log + msg,
|
| 19 |
-
logging.INFO: green + log + msg,
|
| 20 |
-
logging.WARNING: yellow + log + msg,
|
| 21 |
-
logging.ERROR: red + log + msg,
|
| 22 |
-
logging.CRITICAL: bold_red + log + msg,
|
| 23 |
-
}
|
| 24 |
-
|
| 25 |
-
def format(self, record):
|
| 26 |
-
log_fmt = self.FORMATS.get(record.levelno)
|
| 27 |
-
formatter = logging.Formatter(log_fmt)
|
| 28 |
-
return formatter.format(record)
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
stderr_handler = logging.StreamHandler()
|
| 34 |
-
stderr_handler.setLevel(
|
| 35 |
stderr_handler.setFormatter(CustomFormatter())
|
| 36 |
-
logger.addHandler(stderr_handler)
|
| 37 |
|
| 38 |
-
|
| 39 |
-
file_handler
|
|
|
|
| 40 |
file_handler.setFormatter(CustomFormatter(True))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
logger.addHandler(file_handler)
|
|
|
|
| 1 |
import logging
|
| 2 |
|
| 3 |
|
| 4 |
+
def get_handlers(
|
| 5 |
+
filename="app.log", mode="w", file_level=logging.DEBUG, stderr_level=logging.DEBUG
|
| 6 |
+
):
|
| 7 |
+
"""
|
| 8 |
+
The function `get_handlers` returns a stream handler and a file handler with specified logging
|
| 9 |
+
levels and formatters.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
Args:
|
| 12 |
+
filename: The `filename` parameter is the name of the log file where the log messages will be
|
| 13 |
+
written. In this case, the default filename is "app.log". Defaults to app.log
|
| 14 |
+
mode: The `mode` parameter in the `get_handlers` function specifies the mode in which the file
|
| 15 |
+
should be opened. In this case, the default mode is set to "w", which stands for write mode. This
|
| 16 |
+
means that if the file already exists, it will be truncated (i.e., its. Defaults to w
|
| 17 |
+
file_level: The `file_level` parameter in the `get_handlers` function is used to specify the
|
| 18 |
+
logging level for the file handler. In this case, it is set to `logging.DEBUG`, which means that the
|
| 19 |
+
file handler will log all messages at the DEBUG level and above.
|
| 20 |
+
stderr_level: The `stderr_level` parameter in the `get_handlers` function is used to specify the
|
| 21 |
+
logging level for the StreamHandler that outputs log messages to the standard error stream (stderr).
|
| 22 |
+
This level determines which log messages will be processed and output by the StreamHandler.
|
| 23 |
|
| 24 |
+
Returns:
|
| 25 |
+
The `get_handlers` function returns two logging handlers: `stderr_handler` which is a
|
| 26 |
+
StreamHandler for logging to stderr, and `file_handler` which is a FileHandler for logging to a file
|
| 27 |
+
specified by the `filename` parameter.
|
| 28 |
+
"""
|
| 29 |
+
# Stream handler
|
| 30 |
stderr_handler = logging.StreamHandler()
|
| 31 |
+
stderr_handler.setLevel(stderr_level)
|
| 32 |
stderr_handler.setFormatter(CustomFormatter())
|
|
|
|
| 33 |
|
| 34 |
+
# File handler
|
| 35 |
+
file_handler = logging.FileHandler(filename, mode=mode)
|
| 36 |
+
file_handler.setLevel(file_level)
|
| 37 |
file_handler.setFormatter(CustomFormatter(True))
|
| 38 |
+
|
| 39 |
+
# TODO: Add RotatingFileHandler
|
| 40 |
+
|
| 41 |
+
return stderr_handler, file_handler
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class CustomFormatter(logging.Formatter):
|
| 45 |
+
"""
|
| 46 |
+
A custom log formatter that adds color to log messages based on the log level.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
file (bool): Indicates whether the log is being written to a file. Default is False.
|
| 50 |
+
|
| 51 |
+
Attributes:
|
| 52 |
+
FORMATS (dict): A dictionary mapping log levels to colorized log message formats.
|
| 53 |
+
|
| 54 |
+
Methods:
|
| 55 |
+
format(record): Formats the log record with the appropriate colorized log message format.
|
| 56 |
+
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
def __init__(self, file=False):
|
| 60 |
+
"""
|
| 61 |
+
This function initializes logging formats with different colors and styles based on the log
|
| 62 |
+
level.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
file: The `file` parameter in the `__init__` method is a boolean flag that determines whether
|
| 66 |
+
the logging output should be colored or not. If `file` is `True`, the colors will not be applied
|
| 67 |
+
to the log messages. Defaults to False
|
| 68 |
+
"""
|
| 69 |
+
super().__init__()
|
| 70 |
+
yellow = "\x1b[36;10m" if not file else ""
|
| 71 |
+
blue = "\x1b[35;10m" if not file else ""
|
| 72 |
+
green = "\x1b[32;10m" if not file else ""
|
| 73 |
+
red = "\x1b[31;10m" if not file else ""
|
| 74 |
+
bold_red = "\x1b[31;1m" if not file else ""
|
| 75 |
+
reset = "\x1b[0m" if not file else ""
|
| 76 |
+
log = "%(asctime)s (%(filename)s:%(lineno)d) - %(levelname)s: "
|
| 77 |
+
msg = reset + "%(message)s"
|
| 78 |
+
|
| 79 |
+
self.FORMATS = {
|
| 80 |
+
logging.DEBUG: blue + log + msg,
|
| 81 |
+
logging.INFO: green + log + msg,
|
| 82 |
+
logging.WARNING: yellow + log + msg,
|
| 83 |
+
logging.ERROR: red + log + msg,
|
| 84 |
+
logging.CRITICAL: bold_red + log + msg,
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
def format(self, record):
|
| 88 |
+
"""
|
| 89 |
+
Formats the log record with the appropriate colorized log message format.
|
| 90 |
+
|
| 91 |
+
Args:
|
| 92 |
+
record (LogRecord): The log record to be formatted.
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
str: The formatted log message.
|
| 96 |
+
|
| 97 |
+
"""
|
| 98 |
+
log_fmt = self.FORMATS.get(record.levelno)
|
| 99 |
+
formatter = logging.Formatter(log_fmt)
|
| 100 |
+
return formatter.format(record)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def init_logging_config(
|
| 104 |
+
basic_log_level=logging.INFO,
|
| 105 |
+
filename="app.log",
|
| 106 |
+
mode="w",
|
| 107 |
+
file_level=logging.DEBUG,
|
| 108 |
+
stderr_level=logging.DEBUG,
|
| 109 |
+
):
|
| 110 |
+
"""
|
| 111 |
+
The function `init_logging_config` initializes logging configuration in Python by setting basic log
|
| 112 |
+
level, configuring handlers, and adding them to the logger.
|
| 113 |
+
|
| 114 |
+
Args:
|
| 115 |
+
basic_log_level: The `basic_log_level` parameter is used to set the logging level for the root
|
| 116 |
+
logger. In this function, it is set to `logging.INFO` by default, which means that log messages with
|
| 117 |
+
severity level INFO or higher will be processed.
|
| 118 |
+
filename: The `filename` parameter is a string that specifies the name of the log file where the
|
| 119 |
+
logs will be written. In the `init_logging_config` function you provided, the default value for
|
| 120 |
+
`filename` is "app.log". This means that if no filename is provided when calling the function, logs.
|
| 121 |
+
Defaults to app.log
|
| 122 |
+
mode: The `mode` parameter in the `init_logging_config` function specifies the mode in which the
|
| 123 |
+
log file will be opened. In this case, the default value is "w" which stands for write mode. This
|
| 124 |
+
means that the log file will be opened for writing, and if the file already exists. Defaults to w
|
| 125 |
+
file_level: The `file_level` parameter in the `init_logging_config` function is used to specify
|
| 126 |
+
the logging level for the file handler. This determines the severity level of log messages that will
|
| 127 |
+
be written to the log file specified by the `filename` parameter. In this case, the default value
|
| 128 |
+
for `file
|
| 129 |
+
stderr_level: The `stderr_level` parameter in the `init_logging_config` function is used to
|
| 130 |
+
specify the logging level for the stderr (standard error) handler. This handler is responsible for
|
| 131 |
+
directing log messages to the standard error stream. The logging level determines which severity of
|
| 132 |
+
log messages will be output to the stderr.
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
logger = logging.getLogger()
|
| 136 |
+
logger.setLevel(basic_log_level)
|
| 137 |
+
|
| 138 |
+
# Get the handlers
|
| 139 |
+
stderr_handler, file_handler = get_handlers(
|
| 140 |
+
file_level=file_level, stderr_level=stderr_level, filename=filename, mode=mode
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
# Add the handlers
|
| 144 |
+
logger.addHandler(stderr_handler)
|
| 145 |
logger.addHandler(file_handler)
|
streamlit_app.py
CHANGED
|
@@ -12,18 +12,25 @@ from annotated_text import annotated_text, parameters
|
|
| 12 |
from streamlit_extras import add_vertical_space as avs
|
| 13 |
from streamlit_extras.badges import badge
|
| 14 |
|
| 15 |
-
from scripts.utils import get_filenames_from_dir
|
| 16 |
from scripts.similarity.get_score import *
|
| 17 |
-
|
| 18 |
-
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
config_path = os.path.join(cwd, "scripts", "similarity")
|
| 22 |
|
| 23 |
try:
|
| 24 |
-
nltk.data.find(
|
| 25 |
except LookupError:
|
| 26 |
-
nltk.download(
|
| 27 |
|
| 28 |
parameters.SHOW_LABEL_SEPARATOR = False
|
| 29 |
parameters.BORDER_RADIUS = 3
|
|
@@ -55,8 +62,13 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 55 |
edge_x.extend([x0, x1, None])
|
| 56 |
edge_y.extend([y0, y1, None])
|
| 57 |
|
| 58 |
-
edge_trace = go.Scatter(
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# Create node trace
|
| 62 |
node_x = []
|
|
@@ -66,10 +78,26 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 66 |
node_x.append(x)
|
| 67 |
node_y.append(y)
|
| 68 |
|
| 69 |
-
node_trace = go.Scatter(
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
# Color node points by number of connections
|
| 75 |
node_adjacencies = []
|
|
@@ -77,24 +105,32 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 77 |
for node in G.nodes():
|
| 78 |
adjacencies = list(G.adj[node]) # changes here
|
| 79 |
node_adjacencies.append(len(adjacencies))
|
| 80 |
-
node_text.append(f
|
| 81 |
|
| 82 |
node_trace.marker.color = node_adjacencies
|
| 83 |
node_trace.text = node_text
|
| 84 |
|
| 85 |
# Create the figure
|
| 86 |
-
fig = go.Figure(
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
# Show the figure
|
| 94 |
st.plotly_chart(fig)
|
| 95 |
|
| 96 |
|
| 97 |
-
def create_annotated_text(
|
|
|
|
|
|
|
| 98 |
# Tokenize the input string
|
| 99 |
tokens = nltk.word_tokenize(input_string)
|
| 100 |
|
|
@@ -128,18 +164,26 @@ def tokenize_string(input_string):
|
|
| 128 |
|
| 129 |
|
| 130 |
# Display the main title and subheaders
|
| 131 |
-
st.title(
|
| 132 |
with st.sidebar:
|
| 133 |
-
st.image(
|
| 134 |
-
st.subheader(
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
badge(type="github", name="srbhr/Resume-Matcher")
|
| 140 |
-
st.markdown(
|
| 141 |
badge(type="twitter", name="_srbhr_")
|
| 142 |
-
st.markdown(
|
|
|
|
|
|
|
| 143 |
badge(type="buymeacoffee", name="srbhr")
|
| 144 |
|
| 145 |
st.divider()
|
|
@@ -148,9 +192,10 @@ avs.add_vertical_space(1)
|
|
| 148 |
resume_names = get_filenames_from_dir("Data/Processed/Resumes")
|
| 149 |
|
| 150 |
|
| 151 |
-
st.markdown(
|
| 152 |
-
|
| 153 |
-
|
|
|
|
| 154 |
|
| 155 |
|
| 156 |
avs.add_vertical_space(5)
|
|
@@ -161,7 +206,8 @@ selected_file = read_json("Data/Processed/Resumes/" + output)
|
|
| 161 |
avs.add_vertical_space(2)
|
| 162 |
st.markdown("#### Parsed Resume Data")
|
| 163 |
st.caption(
|
| 164 |
-
"This text is parsed from your resume. This is how it'll look like after getting parsed by an ATS."
|
|
|
|
| 165 |
st.caption("Utilize this to understand how to make your resume ATS friendly.")
|
| 166 |
avs.add_vertical_space(3)
|
| 167 |
# st.json(selected_file)
|
|
@@ -170,38 +216,53 @@ st.write(selected_file["clean_data"])
|
|
| 170 |
avs.add_vertical_space(3)
|
| 171 |
st.write("Now let's take a look at the extracted keywords from the resume.")
|
| 172 |
|
| 173 |
-
annotated_text(
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
avs.add_vertical_space(5)
|
| 178 |
st.write("Now let's take a look at the extracted entities from the resume.")
|
| 179 |
|
| 180 |
# Call the function with your data
|
| 181 |
-
create_star_graph(selected_file[
|
| 182 |
|
| 183 |
-
df2 = pd.DataFrame(selected_file[
|
| 184 |
|
| 185 |
# Create the dictionary
|
| 186 |
keyword_dict = {}
|
| 187 |
-
for keyword, value in selected_file[
|
| 188 |
keyword_dict[keyword] = value * 100
|
| 189 |
|
| 190 |
-
fig = go.Figure(
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
st.plotly_chart(fig)
|
| 199 |
|
| 200 |
st.divider()
|
| 201 |
|
| 202 |
-
fig = px.treemap(
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
st.write(fig)
|
| 206 |
|
| 207 |
avs.add_vertical_space(5)
|
|
@@ -209,65 +270,88 @@ avs.add_vertical_space(5)
|
|
| 209 |
job_descriptions = get_filenames_from_dir("Data/Processed/JobDescription")
|
| 210 |
|
| 211 |
|
| 212 |
-
st.markdown(
|
|
|
|
|
|
|
| 213 |
output = st.selectbox("", job_descriptions)
|
| 214 |
|
| 215 |
|
| 216 |
avs.add_vertical_space(5)
|
| 217 |
|
| 218 |
-
selected_jd = read_json(
|
| 219 |
-
"Data/Processed/JobDescription/" + output)
|
| 220 |
|
| 221 |
avs.add_vertical_space(2)
|
| 222 |
st.markdown("#### Job Description")
|
| 223 |
st.caption(
|
| 224 |
-
"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
|
|
|
|
| 225 |
avs.add_vertical_space(3)
|
| 226 |
# st.json(selected_file)
|
| 227 |
st.write(selected_jd["clean_data"])
|
| 228 |
|
| 229 |
st.markdown("#### Common Words between Job Description and Resumes Highlighted.")
|
| 230 |
|
| 231 |
-
annotated_text(
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
| 234 |
|
| 235 |
st.write("Now let's take a look at the extracted entities from the job description.")
|
| 236 |
|
| 237 |
# Call the function with your data
|
| 238 |
-
create_star_graph(selected_jd[
|
| 239 |
|
| 240 |
-
df2 = pd.DataFrame(selected_jd[
|
| 241 |
|
| 242 |
# Create the dictionary
|
| 243 |
keyword_dict = {}
|
| 244 |
-
for keyword, value in selected_jd[
|
| 245 |
keyword_dict[keyword] = value * 100
|
| 246 |
|
| 247 |
-
fig = go.Figure(
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
st.plotly_chart(fig)
|
| 256 |
|
| 257 |
st.divider()
|
| 258 |
|
| 259 |
-
fig = px.treemap(
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
st.write(fig)
|
| 263 |
|
| 264 |
avs.add_vertical_space(3)
|
| 265 |
|
| 266 |
-
resume_string =
|
| 267 |
-
jd_string =
|
| 268 |
result = get_score(resume_string, jd_string)
|
| 269 |
-
similarity_score = result[0].score
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
# Go back to top
|
| 273 |
-
st.markdown(
|
|
|
|
| 12 |
from streamlit_extras import add_vertical_space as avs
|
| 13 |
from streamlit_extras.badges import badge
|
| 14 |
|
|
|
|
| 15 |
from scripts.similarity.get_score import *
|
| 16 |
+
from scripts.utils import get_filenames_from_dir
|
| 17 |
+
from scripts.utils.logger import init_logging_config
|
| 18 |
|
| 19 |
+
# Set page configuration
|
| 20 |
+
st.set_page_config(
|
| 21 |
+
page_title="Resume Matcher",
|
| 22 |
+
page_icon="Assets/img/favicon.ico",
|
| 23 |
+
initial_sidebar_state="auto",
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
init_logging_config()
|
| 27 |
+
cwd = find_path("Resume-Matcher")
|
| 28 |
config_path = os.path.join(cwd, "scripts", "similarity")
|
| 29 |
|
| 30 |
try:
|
| 31 |
+
nltk.data.find("tokenizers/punkt")
|
| 32 |
except LookupError:
|
| 33 |
+
nltk.download("punkt")
|
| 34 |
|
| 35 |
parameters.SHOW_LABEL_SEPARATOR = False
|
| 36 |
parameters.BORDER_RADIUS = 3
|
|
|
|
| 62 |
edge_x.extend([x0, x1, None])
|
| 63 |
edge_y.extend([y0, y1, None])
|
| 64 |
|
| 65 |
+
edge_trace = go.Scatter(
|
| 66 |
+
x=edge_x,
|
| 67 |
+
y=edge_y,
|
| 68 |
+
line=dict(width=0.5, color="#888"),
|
| 69 |
+
hoverinfo="none",
|
| 70 |
+
mode="lines",
|
| 71 |
+
)
|
| 72 |
|
| 73 |
# Create node trace
|
| 74 |
node_x = []
|
|
|
|
| 78 |
node_x.append(x)
|
| 79 |
node_y.append(y)
|
| 80 |
|
| 81 |
+
node_trace = go.Scatter(
|
| 82 |
+
x=node_x,
|
| 83 |
+
y=node_y,
|
| 84 |
+
mode="markers",
|
| 85 |
+
hoverinfo="text",
|
| 86 |
+
marker=dict(
|
| 87 |
+
showscale=True,
|
| 88 |
+
colorscale="Rainbow",
|
| 89 |
+
reversescale=True,
|
| 90 |
+
color=[],
|
| 91 |
+
size=10,
|
| 92 |
+
colorbar=dict(
|
| 93 |
+
thickness=15,
|
| 94 |
+
title="Node Connections",
|
| 95 |
+
xanchor="left",
|
| 96 |
+
titleside="right",
|
| 97 |
+
),
|
| 98 |
+
line_width=2,
|
| 99 |
+
),
|
| 100 |
+
)
|
| 101 |
|
| 102 |
# Color node points by number of connections
|
| 103 |
node_adjacencies = []
|
|
|
|
| 105 |
for node in G.nodes():
|
| 106 |
adjacencies = list(G.adj[node]) # changes here
|
| 107 |
node_adjacencies.append(len(adjacencies))
|
| 108 |
+
node_text.append(f"{node}<br># of connections: {len(adjacencies)}")
|
| 109 |
|
| 110 |
node_trace.marker.color = node_adjacencies
|
| 111 |
node_trace.text = node_text
|
| 112 |
|
| 113 |
# Create the figure
|
| 114 |
+
fig = go.Figure(
|
| 115 |
+
data=[edge_trace, node_trace],
|
| 116 |
+
layout=go.Layout(
|
| 117 |
+
title=title,
|
| 118 |
+
titlefont_size=16,
|
| 119 |
+
showlegend=False,
|
| 120 |
+
hovermode="closest",
|
| 121 |
+
margin=dict(b=20, l=5, r=5, t=40),
|
| 122 |
+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 123 |
+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 124 |
+
),
|
| 125 |
+
)
|
| 126 |
|
| 127 |
# Show the figure
|
| 128 |
st.plotly_chart(fig)
|
| 129 |
|
| 130 |
|
| 131 |
+
def create_annotated_text(
|
| 132 |
+
input_string: str, word_list: List[str], annotation: str, color_code: str
|
| 133 |
+
):
|
| 134 |
# Tokenize the input string
|
| 135 |
tokens = nltk.word_tokenize(input_string)
|
| 136 |
|
|
|
|
| 164 |
|
| 165 |
|
| 166 |
# Display the main title and subheaders
|
| 167 |
+
st.title(":blue[Resume Matcher]")
|
| 168 |
with st.sidebar:
|
| 169 |
+
st.image("Assets/img/header_image.png")
|
| 170 |
+
st.subheader(
|
| 171 |
+
"Free and Open Source ATS to help your resume pass the screening stage."
|
| 172 |
+
)
|
| 173 |
+
st.markdown(
|
| 174 |
+
"Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)"
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
st.markdown(
|
| 178 |
+
"Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)"
|
| 179 |
+
)
|
| 180 |
|
| 181 |
badge(type="github", name="srbhr/Resume-Matcher")
|
| 182 |
+
st.markdown("For updates follow me on Twitter.")
|
| 183 |
badge(type="twitter", name="_srbhr_")
|
| 184 |
+
st.markdown(
|
| 185 |
+
"If you like the project and would like to further help in development please consider 👇"
|
| 186 |
+
)
|
| 187 |
badge(type="buymeacoffee", name="srbhr")
|
| 188 |
|
| 189 |
st.divider()
|
|
|
|
| 192 |
resume_names = get_filenames_from_dir("Data/Processed/Resumes")
|
| 193 |
|
| 194 |
|
| 195 |
+
st.markdown(
|
| 196 |
+
f"##### There are {len(resume_names)} resumes present. Please select one from the menu below:"
|
| 197 |
+
)
|
| 198 |
+
output = st.selectbox(f"", resume_names)
|
| 199 |
|
| 200 |
|
| 201 |
avs.add_vertical_space(5)
|
|
|
|
| 206 |
avs.add_vertical_space(2)
|
| 207 |
st.markdown("#### Parsed Resume Data")
|
| 208 |
st.caption(
|
| 209 |
+
"This text is parsed from your resume. This is how it'll look like after getting parsed by an ATS."
|
| 210 |
+
)
|
| 211 |
st.caption("Utilize this to understand how to make your resume ATS friendly.")
|
| 212 |
avs.add_vertical_space(3)
|
| 213 |
# st.json(selected_file)
|
|
|
|
| 216 |
avs.add_vertical_space(3)
|
| 217 |
st.write("Now let's take a look at the extracted keywords from the resume.")
|
| 218 |
|
| 219 |
+
annotated_text(
|
| 220 |
+
create_annotated_text(
|
| 221 |
+
selected_file["clean_data"],
|
| 222 |
+
selected_file["extracted_keywords"],
|
| 223 |
+
"KW",
|
| 224 |
+
"#0B666A",
|
| 225 |
+
)
|
| 226 |
+
)
|
| 227 |
|
| 228 |
avs.add_vertical_space(5)
|
| 229 |
st.write("Now let's take a look at the extracted entities from the resume.")
|
| 230 |
|
| 231 |
# Call the function with your data
|
| 232 |
+
create_star_graph(selected_file["keyterms"], "Entities from Resume")
|
| 233 |
|
| 234 |
+
df2 = pd.DataFrame(selected_file["keyterms"], columns=["keyword", "value"])
|
| 235 |
|
| 236 |
# Create the dictionary
|
| 237 |
keyword_dict = {}
|
| 238 |
+
for keyword, value in selected_file["keyterms"]:
|
| 239 |
keyword_dict[keyword] = value * 100
|
| 240 |
|
| 241 |
+
fig = go.Figure(
|
| 242 |
+
data=[
|
| 243 |
+
go.Table(
|
| 244 |
+
header=dict(
|
| 245 |
+
values=["Keyword", "Value"], font=dict(size=12), fill_color="#070A52"
|
| 246 |
+
),
|
| 247 |
+
cells=dict(
|
| 248 |
+
values=[list(keyword_dict.keys()), list(keyword_dict.values())],
|
| 249 |
+
line_color="darkslategray",
|
| 250 |
+
fill_color="#6DA9E4",
|
| 251 |
+
),
|
| 252 |
+
)
|
| 253 |
+
]
|
| 254 |
+
)
|
| 255 |
st.plotly_chart(fig)
|
| 256 |
|
| 257 |
st.divider()
|
| 258 |
|
| 259 |
+
fig = px.treemap(
|
| 260 |
+
df2,
|
| 261 |
+
path=["keyword"],
|
| 262 |
+
values="value",
|
| 263 |
+
color_continuous_scale="Rainbow",
|
| 264 |
+
title="Key Terms/Topics Extracted from your Resume",
|
| 265 |
+
)
|
| 266 |
st.write(fig)
|
| 267 |
|
| 268 |
avs.add_vertical_space(5)
|
|
|
|
| 270 |
job_descriptions = get_filenames_from_dir("Data/Processed/JobDescription")
|
| 271 |
|
| 272 |
|
| 273 |
+
st.markdown(
|
| 274 |
+
f"##### There are {len(job_descriptions)} job descriptions present. Please select one from the menu below:"
|
| 275 |
+
)
|
| 276 |
output = st.selectbox("", job_descriptions)
|
| 277 |
|
| 278 |
|
| 279 |
avs.add_vertical_space(5)
|
| 280 |
|
| 281 |
+
selected_jd = read_json("Data/Processed/JobDescription/" + output)
|
|
|
|
| 282 |
|
| 283 |
avs.add_vertical_space(2)
|
| 284 |
st.markdown("#### Job Description")
|
| 285 |
st.caption(
|
| 286 |
+
"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
|
| 287 |
+
)
|
| 288 |
avs.add_vertical_space(3)
|
| 289 |
# st.json(selected_file)
|
| 290 |
st.write(selected_jd["clean_data"])
|
| 291 |
|
| 292 |
st.markdown("#### Common Words between Job Description and Resumes Highlighted.")
|
| 293 |
|
| 294 |
+
annotated_text(
|
| 295 |
+
create_annotated_text(
|
| 296 |
+
selected_file["clean_data"], selected_jd["extracted_keywords"], "JD", "#F24C3D"
|
| 297 |
+
)
|
| 298 |
+
)
|
| 299 |
|
| 300 |
st.write("Now let's take a look at the extracted entities from the job description.")
|
| 301 |
|
| 302 |
# Call the function with your data
|
| 303 |
+
create_star_graph(selected_jd["keyterms"], "Entities from Job Description")
|
| 304 |
|
| 305 |
+
df2 = pd.DataFrame(selected_jd["keyterms"], columns=["keyword", "value"])
|
| 306 |
|
| 307 |
# Create the dictionary
|
| 308 |
keyword_dict = {}
|
| 309 |
+
for keyword, value in selected_jd["keyterms"]:
|
| 310 |
keyword_dict[keyword] = value * 100
|
| 311 |
|
| 312 |
+
fig = go.Figure(
|
| 313 |
+
data=[
|
| 314 |
+
go.Table(
|
| 315 |
+
header=dict(
|
| 316 |
+
values=["Keyword", "Value"], font=dict(size=12), fill_color="#070A52"
|
| 317 |
+
),
|
| 318 |
+
cells=dict(
|
| 319 |
+
values=[list(keyword_dict.keys()), list(keyword_dict.values())],
|
| 320 |
+
line_color="darkslategray",
|
| 321 |
+
fill_color="#6DA9E4",
|
| 322 |
+
),
|
| 323 |
+
)
|
| 324 |
+
]
|
| 325 |
+
)
|
| 326 |
st.plotly_chart(fig)
|
| 327 |
|
| 328 |
st.divider()
|
| 329 |
|
| 330 |
+
fig = px.treemap(
|
| 331 |
+
df2,
|
| 332 |
+
path=["keyword"],
|
| 333 |
+
values="value",
|
| 334 |
+
color_continuous_scale="Rainbow",
|
| 335 |
+
title="Key Terms/Topics Extracted from the selected Job Description",
|
| 336 |
+
)
|
| 337 |
st.write(fig)
|
| 338 |
|
| 339 |
avs.add_vertical_space(3)
|
| 340 |
|
| 341 |
+
resume_string = " ".join(selected_file["extracted_keywords"])
|
| 342 |
+
jd_string = " ".join(selected_jd["extracted_keywords"])
|
| 343 |
result = get_score(resume_string, jd_string)
|
| 344 |
+
similarity_score = round(result[0].score * 100, 2)
|
| 345 |
+
score_color = "green"
|
| 346 |
+
if similarity_score < 60:
|
| 347 |
+
score_color = "red"
|
| 348 |
+
elif 60 <= similarity_score < 75:
|
| 349 |
+
score_color = "orange"
|
| 350 |
+
st.markdown(
|
| 351 |
+
f"Similarity Score obtained for the resume and job description is "
|
| 352 |
+
f'<span style="color:{score_color};font-size:24px; font-weight:Bold">{similarity_score}</span>',
|
| 353 |
+
unsafe_allow_html=True,
|
| 354 |
+
)
|
| 355 |
|
| 356 |
# Go back to top
|
| 357 |
+
st.markdown("[:arrow_up: Back to Top](#resume-matcher)")
|
streamlit_interactive.py
CHANGED
|
@@ -13,25 +13,29 @@ from annotated_text import annotated_text, parameters
|
|
| 13 |
from streamlit_extras import add_vertical_space as avs
|
| 14 |
from streamlit_extras.badges import badge
|
| 15 |
|
| 16 |
-
from scripts import
|
|
|
|
| 17 |
from scripts.ReadPdf import read_single_pdf
|
| 18 |
-
from scripts.similarity import
|
| 19 |
-
from scripts.parsers import ParseResume
|
| 20 |
-
from scripts.parsers import ParseJobDesc
|
| 21 |
from scripts.utils import get_filenames_from_dir
|
| 22 |
|
| 23 |
# Set page configuration
|
| 24 |
-
st.set_page_config(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# Find the current working directory and configuration path
|
| 27 |
-
cwd = find_path(
|
| 28 |
config_path = os.path.join(cwd, "scripts", "similarity")
|
| 29 |
|
| 30 |
# Check if NLTK punkt data is available, if not, download it
|
| 31 |
try:
|
| 32 |
-
nltk.data.find(
|
| 33 |
except LookupError:
|
| 34 |
-
nltk.download(
|
| 35 |
|
| 36 |
# Set some visualization parameters using the annotated_text library
|
| 37 |
parameters.SHOW_LABEL_SEPARATOR = False
|
|
@@ -92,8 +96,13 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 92 |
edge_x.extend([x0, x1, None])
|
| 93 |
edge_y.extend([y0, y1, None])
|
| 94 |
|
| 95 |
-
edge_trace = go.Scatter(
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
# Create node trace
|
| 99 |
node_x = []
|
|
@@ -103,10 +112,26 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 103 |
node_x.append(x)
|
| 104 |
node_y.append(y)
|
| 105 |
|
| 106 |
-
node_trace = go.Scatter(
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
# Color node points by number of connections
|
| 112 |
node_adjacencies = []
|
|
@@ -114,25 +139,33 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 114 |
for node in graph.nodes():
|
| 115 |
adjacencies = list(graph.adj[node]) # Changes here
|
| 116 |
node_adjacencies.append(len(adjacencies))
|
| 117 |
-
node_text.append(f
|
| 118 |
|
| 119 |
node_trace.marker.color = node_adjacencies
|
| 120 |
node_trace.text = node_text
|
| 121 |
|
| 122 |
# Create the figure
|
| 123 |
-
figure = go.Figure(
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
# Show the figure
|
| 131 |
st.plotly_chart(figure, use_container_width=True)
|
| 132 |
|
| 133 |
|
| 134 |
# Function to create annotated text with highlighting
|
| 135 |
-
def create_annotated_text(
|
|
|
|
|
|
|
| 136 |
"""
|
| 137 |
Create annotated text with highlighted keywords.
|
| 138 |
|
|
@@ -210,16 +243,24 @@ if "jobDescriptionUploaded" not in st.session_state.keys():
|
|
| 210 |
update_session_state("jobDescriptionPath", "")
|
| 211 |
|
| 212 |
# Display the main title and sub-headers
|
| 213 |
-
st.title(
|
| 214 |
with st.sidebar:
|
| 215 |
-
st.image(
|
| 216 |
-
st.subheader(
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
badge(type="github", name="srbhr/Resume-Matcher")
|
| 220 |
-
st.markdown(
|
| 221 |
badge(type="twitter", name="_srbhr_")
|
| 222 |
-
st.markdown(
|
|
|
|
|
|
|
| 223 |
badge(type="buymeacoffee", name="srbhr")
|
| 224 |
|
| 225 |
st.divider()
|
|
@@ -231,13 +272,17 @@ with st.container():
|
|
| 231 |
uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf")
|
| 232 |
if uploaded_Resume is not None:
|
| 233 |
if st.session_state["resumeUploaded"] == "Pending":
|
| 234 |
-
save_path_resume = os.path.join(
|
|
|
|
|
|
|
| 235 |
|
| 236 |
-
with open(save_path_resume, mode=
|
| 237 |
w.write(uploaded_Resume.getvalue())
|
| 238 |
|
| 239 |
if os.path.exists(save_path_resume):
|
| 240 |
-
st.toast(
|
|
|
|
|
|
|
| 241 |
update_session_state("resumeUploaded", "Uploaded")
|
| 242 |
update_session_state("resumePath", save_path_resume)
|
| 243 |
else:
|
|
@@ -245,170 +290,247 @@ with st.container():
|
|
| 245 |
update_session_state("resumePath", "")
|
| 246 |
|
| 247 |
with jobDescriptionCol:
|
| 248 |
-
uploaded_JobDescription = st.file_uploader(
|
|
|
|
|
|
|
| 249 |
if uploaded_JobDescription is not None:
|
| 250 |
if st.session_state["jobDescriptionUploaded"] == "Pending":
|
| 251 |
-
save_path_jobDescription = os.path.join(
|
|
|
|
|
|
|
| 252 |
|
| 253 |
-
with open(save_path_jobDescription, mode=
|
| 254 |
w.write(uploaded_JobDescription.getvalue())
|
| 255 |
|
| 256 |
if os.path.exists(save_path_jobDescription):
|
| 257 |
-
st.toast(
|
|
|
|
|
|
|
|
|
|
| 258 |
update_session_state("jobDescriptionUploaded", "Uploaded")
|
| 259 |
update_session_state("jobDescriptionPath", save_path_jobDescription)
|
| 260 |
else:
|
| 261 |
update_session_state("jobDescriptionUploaded", "Pending")
|
| 262 |
update_session_state("jobDescriptionPath", "")
|
| 263 |
|
| 264 |
-
with st.spinner(
|
| 265 |
-
if (
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
| 269 |
|
| 270 |
resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"]))
|
| 271 |
-
jobDescriptionProcessor = ParseJobDesc(
|
|
|
|
|
|
|
| 272 |
|
| 273 |
# Resume / JD output
|
| 274 |
selected_file = resumeProcessor.get_JSON()
|
| 275 |
selected_jd = jobDescriptionProcessor.get_JSON()
|
| 276 |
|
| 277 |
# Add containers for each row to avoid overlap
|
|
|
|
|
|
|
| 278 |
with st.container():
|
| 279 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 280 |
with resumeCol:
|
| 281 |
with st.expander("Parsed Resume Data"):
|
| 282 |
st.caption(
|
| 283 |
"This text is parsed from your resume. This is how it'll look like after getting parsed by an "
|
| 284 |
-
"ATS."
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
| 286 |
avs.add_vertical_space(3)
|
| 287 |
st.write(selected_file["clean_data"])
|
| 288 |
|
| 289 |
with jobDescriptionCol:
|
| 290 |
with st.expander("Parsed Job Description"):
|
| 291 |
st.caption(
|
| 292 |
-
"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
|
|
|
|
| 293 |
avs.add_vertical_space(3)
|
| 294 |
st.write(selected_jd["clean_data"])
|
| 295 |
|
|
|
|
| 296 |
with st.container():
|
| 297 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 298 |
with resumeCol:
|
| 299 |
with st.expander("Extracted Keywords"):
|
| 300 |
-
st.write(
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
with jobDescriptionCol:
|
| 305 |
with st.expander("Extracted Keywords"):
|
| 306 |
-
st.write(
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
with st.container():
|
| 312 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 313 |
with resumeCol:
|
| 314 |
with st.expander("Extracted Entities"):
|
| 315 |
-
st.write(
|
|
|
|
|
|
|
| 316 |
|
| 317 |
# Call the function with your data
|
| 318 |
-
create_star_graph(selected_file[
|
| 319 |
with jobDescriptionCol:
|
| 320 |
with st.expander("Extracted Entities"):
|
| 321 |
-
st.write(
|
|
|
|
|
|
|
| 322 |
|
| 323 |
# Call the function with your data
|
| 324 |
-
create_star_graph(
|
|
|
|
|
|
|
| 325 |
|
|
|
|
| 326 |
with st.container():
|
| 327 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 328 |
with resumeCol:
|
| 329 |
with st.expander("Keywords & Values"):
|
| 330 |
-
df1 = pd.DataFrame(
|
|
|
|
|
|
|
| 331 |
|
| 332 |
# Create the dictionary
|
| 333 |
keyword_dict = {}
|
| 334 |
-
for keyword, value in selected_file[
|
| 335 |
keyword_dict[keyword] = value * 100
|
| 336 |
|
| 337 |
-
fig = go.Figure(
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
st.plotly_chart(fig, use_container_width=True)
|
| 346 |
with jobDescriptionCol:
|
| 347 |
with st.expander("Keywords & Values"):
|
| 348 |
-
df2 = pd.DataFrame(
|
|
|
|
|
|
|
| 349 |
|
| 350 |
# Create the dictionary
|
| 351 |
keyword_dict = {}
|
| 352 |
-
for keyword, value in selected_jd[
|
| 353 |
keyword_dict[keyword] = value * 100
|
| 354 |
|
| 355 |
-
fig = go.Figure(
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
st.plotly_chart(fig, use_container_width=True)
|
| 364 |
|
|
|
|
| 365 |
with st.container():
|
| 366 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 367 |
with resumeCol:
|
| 368 |
with st.expander("Key Topics"):
|
| 369 |
-
fig = px.treemap(
|
| 370 |
-
|
| 371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
st.plotly_chart(fig, use_container_width=True)
|
| 373 |
|
| 374 |
with jobDescriptionCol:
|
| 375 |
with st.expander("Key Topics"):
|
| 376 |
-
fig = px.treemap(
|
| 377 |
-
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
st.plotly_chart(fig, use_container_width=True)
|
| 380 |
|
| 381 |
avs.add_vertical_space(2)
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
unsafe_allow_html=True)
|
| 402 |
-
else:
|
| 403 |
-
print("Config file does not exist.")
|
| 404 |
|
| 405 |
avs.add_vertical_space(2)
|
| 406 |
with st.expander("Common words between Resume and Job Description:"):
|
| 407 |
-
annotated_text(
|
| 408 |
-
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
st.divider()
|
| 412 |
|
| 413 |
# Go back to top
|
| 414 |
-
st.markdown(
|
|
|
|
| 13 |
from streamlit_extras import add_vertical_space as avs
|
| 14 |
from streamlit_extras.badges import badge
|
| 15 |
|
| 16 |
+
from scripts import JobDescriptionProcessor, ResumeProcessor
|
| 17 |
+
from scripts.parsers import ParseJobDesc, ParseResume
|
| 18 |
from scripts.ReadPdf import read_single_pdf
|
| 19 |
+
from scripts.similarity.get_score import *
|
|
|
|
|
|
|
| 20 |
from scripts.utils import get_filenames_from_dir
|
| 21 |
|
| 22 |
# Set page configuration
|
| 23 |
+
st.set_page_config(
|
| 24 |
+
page_title="Resume Matcher",
|
| 25 |
+
page_icon="Assets/img/favicon.ico",
|
| 26 |
+
initial_sidebar_state="auto",
|
| 27 |
+
layout="wide",
|
| 28 |
+
)
|
| 29 |
|
| 30 |
# Find the current working directory and configuration path
|
| 31 |
+
cwd = find_path("Resume-Matcher")
|
| 32 |
config_path = os.path.join(cwd, "scripts", "similarity")
|
| 33 |
|
| 34 |
# Check if NLTK punkt data is available, if not, download it
|
| 35 |
try:
|
| 36 |
+
nltk.data.find("tokenizers/punkt")
|
| 37 |
except LookupError:
|
| 38 |
+
nltk.download("punkt")
|
| 39 |
|
| 40 |
# Set some visualization parameters using the annotated_text library
|
| 41 |
parameters.SHOW_LABEL_SEPARATOR = False
|
|
|
|
| 96 |
edge_x.extend([x0, x1, None])
|
| 97 |
edge_y.extend([y0, y1, None])
|
| 98 |
|
| 99 |
+
edge_trace = go.Scatter(
|
| 100 |
+
x=edge_x,
|
| 101 |
+
y=edge_y,
|
| 102 |
+
line=dict(width=0.5, color="#888"),
|
| 103 |
+
hoverinfo="none",
|
| 104 |
+
mode="lines",
|
| 105 |
+
)
|
| 106 |
|
| 107 |
# Create node trace
|
| 108 |
node_x = []
|
|
|
|
| 112 |
node_x.append(x)
|
| 113 |
node_y.append(y)
|
| 114 |
|
| 115 |
+
node_trace = go.Scatter(
|
| 116 |
+
x=node_x,
|
| 117 |
+
y=node_y,
|
| 118 |
+
mode="markers",
|
| 119 |
+
hoverinfo="text",
|
| 120 |
+
marker=dict(
|
| 121 |
+
showscale=True,
|
| 122 |
+
colorscale="Rainbow",
|
| 123 |
+
reversescale=True,
|
| 124 |
+
color=[],
|
| 125 |
+
size=10,
|
| 126 |
+
colorbar=dict(
|
| 127 |
+
thickness=15,
|
| 128 |
+
title="Node Connections",
|
| 129 |
+
xanchor="left",
|
| 130 |
+
titleside="right",
|
| 131 |
+
),
|
| 132 |
+
line_width=2,
|
| 133 |
+
),
|
| 134 |
+
)
|
| 135 |
|
| 136 |
# Color node points by number of connections
|
| 137 |
node_adjacencies = []
|
|
|
|
| 139 |
for node in graph.nodes():
|
| 140 |
adjacencies = list(graph.adj[node]) # Changes here
|
| 141 |
node_adjacencies.append(len(adjacencies))
|
| 142 |
+
node_text.append(f"{node}<br># of connections: {len(adjacencies)}")
|
| 143 |
|
| 144 |
node_trace.marker.color = node_adjacencies
|
| 145 |
node_trace.text = node_text
|
| 146 |
|
| 147 |
# Create the figure
|
| 148 |
+
figure = go.Figure(
|
| 149 |
+
data=[edge_trace, node_trace],
|
| 150 |
+
layout=go.Layout(
|
| 151 |
+
title=title,
|
| 152 |
+
titlefont=dict(size=16),
|
| 153 |
+
showlegend=False,
|
| 154 |
+
hovermode="closest",
|
| 155 |
+
margin=dict(b=20, l=5, r=5, t=40),
|
| 156 |
+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 157 |
+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 158 |
+
),
|
| 159 |
+
)
|
| 160 |
|
| 161 |
# Show the figure
|
| 162 |
st.plotly_chart(figure, use_container_width=True)
|
| 163 |
|
| 164 |
|
| 165 |
# Function to create annotated text with highlighting
|
| 166 |
+
def create_annotated_text(
|
| 167 |
+
input_string: str, word_list: List[str], annotation: str, color_code: str
|
| 168 |
+
):
|
| 169 |
"""
|
| 170 |
Create annotated text with highlighted keywords.
|
| 171 |
|
|
|
|
| 243 |
update_session_state("jobDescriptionPath", "")
|
| 244 |
|
| 245 |
# Display the main title and sub-headers
|
| 246 |
+
st.title(":blue[Resume Matcher]")
|
| 247 |
with st.sidebar:
|
| 248 |
+
st.image("Assets/img/header_image.png")
|
| 249 |
+
st.subheader(
|
| 250 |
+
"Free and Open Source ATS to help your resume pass the screening stage."
|
| 251 |
+
)
|
| 252 |
+
st.markdown(
|
| 253 |
+
"Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)"
|
| 254 |
+
)
|
| 255 |
+
st.markdown(
|
| 256 |
+
"Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)"
|
| 257 |
+
)
|
| 258 |
badge(type="github", name="srbhr/Resume-Matcher")
|
| 259 |
+
st.markdown("For updates follow me on Twitter.")
|
| 260 |
badge(type="twitter", name="_srbhr_")
|
| 261 |
+
st.markdown(
|
| 262 |
+
"If you like the project and would like to further help in development please consider 👇"
|
| 263 |
+
)
|
| 264 |
badge(type="buymeacoffee", name="srbhr")
|
| 265 |
|
| 266 |
st.divider()
|
|
|
|
| 272 |
uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf")
|
| 273 |
if uploaded_Resume is not None:
|
| 274 |
if st.session_state["resumeUploaded"] == "Pending":
|
| 275 |
+
save_path_resume = os.path.join(
|
| 276 |
+
cwd, "Data", "Resumes", uploaded_Resume.name
|
| 277 |
+
)
|
| 278 |
|
| 279 |
+
with open(save_path_resume, mode="wb") as w:
|
| 280 |
w.write(uploaded_Resume.getvalue())
|
| 281 |
|
| 282 |
if os.path.exists(save_path_resume):
|
| 283 |
+
st.toast(
|
| 284 |
+
f"File {uploaded_Resume.name} is successfully saved!", icon="✔️"
|
| 285 |
+
)
|
| 286 |
update_session_state("resumeUploaded", "Uploaded")
|
| 287 |
update_session_state("resumePath", save_path_resume)
|
| 288 |
else:
|
|
|
|
| 290 |
update_session_state("resumePath", "")
|
| 291 |
|
| 292 |
with jobDescriptionCol:
|
| 293 |
+
uploaded_JobDescription = st.file_uploader(
|
| 294 |
+
"Choose a Job Description", type="pdf"
|
| 295 |
+
)
|
| 296 |
if uploaded_JobDescription is not None:
|
| 297 |
if st.session_state["jobDescriptionUploaded"] == "Pending":
|
| 298 |
+
save_path_jobDescription = os.path.join(
|
| 299 |
+
cwd, "Data", "JobDescription", uploaded_JobDescription.name
|
| 300 |
+
)
|
| 301 |
|
| 302 |
+
with open(save_path_jobDescription, mode="wb") as w:
|
| 303 |
w.write(uploaded_JobDescription.getvalue())
|
| 304 |
|
| 305 |
if os.path.exists(save_path_jobDescription):
|
| 306 |
+
st.toast(
|
| 307 |
+
f"File {uploaded_JobDescription.name} is successfully saved!",
|
| 308 |
+
icon="✔️",
|
| 309 |
+
)
|
| 310 |
update_session_state("jobDescriptionUploaded", "Uploaded")
|
| 311 |
update_session_state("jobDescriptionPath", save_path_jobDescription)
|
| 312 |
else:
|
| 313 |
update_session_state("jobDescriptionUploaded", "Pending")
|
| 314 |
update_session_state("jobDescriptionPath", "")
|
| 315 |
|
| 316 |
+
with st.spinner("Please wait..."):
|
| 317 |
+
if (
|
| 318 |
+
uploaded_Resume is not None
|
| 319 |
+
and st.session_state["jobDescriptionUploaded"] == "Uploaded"
|
| 320 |
+
and uploaded_JobDescription is not None
|
| 321 |
+
and st.session_state["jobDescriptionUploaded"] == "Uploaded"
|
| 322 |
+
):
|
| 323 |
|
| 324 |
resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"]))
|
| 325 |
+
jobDescriptionProcessor = ParseJobDesc(
|
| 326 |
+
read_single_pdf(st.session_state["jobDescriptionPath"])
|
| 327 |
+
)
|
| 328 |
|
| 329 |
# Resume / JD output
|
| 330 |
selected_file = resumeProcessor.get_JSON()
|
| 331 |
selected_jd = jobDescriptionProcessor.get_JSON()
|
| 332 |
|
| 333 |
# Add containers for each row to avoid overlap
|
| 334 |
+
|
| 335 |
+
# Parsed data
|
| 336 |
with st.container():
|
| 337 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 338 |
with resumeCol:
|
| 339 |
with st.expander("Parsed Resume Data"):
|
| 340 |
st.caption(
|
| 341 |
"This text is parsed from your resume. This is how it'll look like after getting parsed by an "
|
| 342 |
+
"ATS."
|
| 343 |
+
)
|
| 344 |
+
st.caption(
|
| 345 |
+
"Utilize this to understand how to make your resume ATS friendly."
|
| 346 |
+
)
|
| 347 |
avs.add_vertical_space(3)
|
| 348 |
st.write(selected_file["clean_data"])
|
| 349 |
|
| 350 |
with jobDescriptionCol:
|
| 351 |
with st.expander("Parsed Job Description"):
|
| 352 |
st.caption(
|
| 353 |
+
"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
|
| 354 |
+
)
|
| 355 |
avs.add_vertical_space(3)
|
| 356 |
st.write(selected_jd["clean_data"])
|
| 357 |
|
| 358 |
+
# Extracted keywords
|
| 359 |
with st.container():
|
| 360 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 361 |
with resumeCol:
|
| 362 |
with st.expander("Extracted Keywords"):
|
| 363 |
+
st.write(
|
| 364 |
+
"Now let's take a look at the extracted keywords from the resume."
|
| 365 |
+
)
|
| 366 |
+
annotated_text(
|
| 367 |
+
create_annotated_text(
|
| 368 |
+
selected_file["clean_data"],
|
| 369 |
+
selected_file["extracted_keywords"],
|
| 370 |
+
"KW",
|
| 371 |
+
"#0B666A",
|
| 372 |
+
)
|
| 373 |
+
)
|
| 374 |
with jobDescriptionCol:
|
| 375 |
with st.expander("Extracted Keywords"):
|
| 376 |
+
st.write(
|
| 377 |
+
"Now let's take a look at the extracted keywords from the job description."
|
| 378 |
+
)
|
| 379 |
+
annotated_text(
|
| 380 |
+
create_annotated_text(
|
| 381 |
+
selected_jd["clean_data"],
|
| 382 |
+
selected_jd["extracted_keywords"],
|
| 383 |
+
"KW",
|
| 384 |
+
"#0B666A",
|
| 385 |
+
)
|
| 386 |
+
)
|
| 387 |
+
|
| 388 |
+
# Star graph visualization
|
| 389 |
with st.container():
|
| 390 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 391 |
with resumeCol:
|
| 392 |
with st.expander("Extracted Entities"):
|
| 393 |
+
st.write(
|
| 394 |
+
"Now let's take a look at the extracted entities from the resume."
|
| 395 |
+
)
|
| 396 |
|
| 397 |
# Call the function with your data
|
| 398 |
+
create_star_graph(selected_file["keyterms"], "Entities from Resume")
|
| 399 |
with jobDescriptionCol:
|
| 400 |
with st.expander("Extracted Entities"):
|
| 401 |
+
st.write(
|
| 402 |
+
"Now let's take a look at the extracted entities from the job description."
|
| 403 |
+
)
|
| 404 |
|
| 405 |
# Call the function with your data
|
| 406 |
+
create_star_graph(
|
| 407 |
+
selected_jd["keyterms"], "Entities from Job Description"
|
| 408 |
+
)
|
| 409 |
|
| 410 |
+
# Keywords and values
|
| 411 |
with st.container():
|
| 412 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 413 |
with resumeCol:
|
| 414 |
with st.expander("Keywords & Values"):
|
| 415 |
+
df1 = pd.DataFrame(
|
| 416 |
+
selected_file["keyterms"], columns=["keyword", "value"]
|
| 417 |
+
)
|
| 418 |
|
| 419 |
# Create the dictionary
|
| 420 |
keyword_dict = {}
|
| 421 |
+
for keyword, value in selected_file["keyterms"]:
|
| 422 |
keyword_dict[keyword] = value * 100
|
| 423 |
|
| 424 |
+
fig = go.Figure(
|
| 425 |
+
data=[
|
| 426 |
+
go.Table(
|
| 427 |
+
header=dict(
|
| 428 |
+
values=["Keyword", "Value"],
|
| 429 |
+
font=dict(size=12, color="white"),
|
| 430 |
+
fill_color="#1d2078",
|
| 431 |
+
),
|
| 432 |
+
cells=dict(
|
| 433 |
+
values=[
|
| 434 |
+
list(keyword_dict.keys()),
|
| 435 |
+
list(keyword_dict.values()),
|
| 436 |
+
],
|
| 437 |
+
line_color="darkslategray",
|
| 438 |
+
fill_color="#6DA9E4",
|
| 439 |
+
),
|
| 440 |
+
)
|
| 441 |
+
]
|
| 442 |
+
)
|
| 443 |
st.plotly_chart(fig, use_container_width=True)
|
| 444 |
with jobDescriptionCol:
|
| 445 |
with st.expander("Keywords & Values"):
|
| 446 |
+
df2 = pd.DataFrame(
|
| 447 |
+
selected_jd["keyterms"], columns=["keyword", "value"]
|
| 448 |
+
)
|
| 449 |
|
| 450 |
# Create the dictionary
|
| 451 |
keyword_dict = {}
|
| 452 |
+
for keyword, value in selected_jd["keyterms"]:
|
| 453 |
keyword_dict[keyword] = value * 100
|
| 454 |
|
| 455 |
+
fig = go.Figure(
|
| 456 |
+
data=[
|
| 457 |
+
go.Table(
|
| 458 |
+
header=dict(
|
| 459 |
+
values=["Keyword", "Value"],
|
| 460 |
+
font=dict(size=12, color="white"),
|
| 461 |
+
fill_color="#1d2078",
|
| 462 |
+
),
|
| 463 |
+
cells=dict(
|
| 464 |
+
values=[
|
| 465 |
+
list(keyword_dict.keys()),
|
| 466 |
+
list(keyword_dict.values()),
|
| 467 |
+
],
|
| 468 |
+
line_color="darkslategray",
|
| 469 |
+
fill_color="#6DA9E4",
|
| 470 |
+
),
|
| 471 |
+
)
|
| 472 |
+
]
|
| 473 |
+
)
|
| 474 |
st.plotly_chart(fig, use_container_width=True)
|
| 475 |
|
| 476 |
+
# Treemaps
|
| 477 |
with st.container():
|
| 478 |
resumeCol, jobDescriptionCol = st.columns(2)
|
| 479 |
with resumeCol:
|
| 480 |
with st.expander("Key Topics"):
|
| 481 |
+
fig = px.treemap(
|
| 482 |
+
df1,
|
| 483 |
+
path=["keyword"],
|
| 484 |
+
values="value",
|
| 485 |
+
color_continuous_scale="Rainbow",
|
| 486 |
+
title="Key Terms/Topics Extracted from your Resume",
|
| 487 |
+
)
|
| 488 |
st.plotly_chart(fig, use_container_width=True)
|
| 489 |
|
| 490 |
with jobDescriptionCol:
|
| 491 |
with st.expander("Key Topics"):
|
| 492 |
+
fig = px.treemap(
|
| 493 |
+
df2,
|
| 494 |
+
path=["keyword"],
|
| 495 |
+
values="value",
|
| 496 |
+
color_continuous_scale="Rainbow",
|
| 497 |
+
title="Key Terms/Topics Extracted from Job Description",
|
| 498 |
+
)
|
| 499 |
st.plotly_chart(fig, use_container_width=True)
|
| 500 |
|
| 501 |
avs.add_vertical_space(2)
|
| 502 |
+
st.markdown("#### Similarity Score")
|
| 503 |
+
print("Config file parsed successfully:")
|
| 504 |
+
resume_string = " ".join(selected_file["extracted_keywords"])
|
| 505 |
+
jd_string = " ".join(selected_jd["extracted_keywords"])
|
| 506 |
+
result = get_score(resume_string, jd_string)
|
| 507 |
+
similarity_score = round(result[0].score * 100, 2)
|
| 508 |
+
|
| 509 |
+
# Default color to green
|
| 510 |
+
score_color = "green"
|
| 511 |
+
if similarity_score < 60:
|
| 512 |
+
score_color = "red"
|
| 513 |
+
elif 60 <= similarity_score < 75:
|
| 514 |
+
score_color = "orange"
|
| 515 |
+
|
| 516 |
+
st.markdown(
|
| 517 |
+
f"Similarity Score obtained for the resume and job description is "
|
| 518 |
+
f'<span style="color:{score_color};font-size:24px; font-weight:Bold">{similarity_score}</span>',
|
| 519 |
+
unsafe_allow_html=True,
|
| 520 |
+
)
|
|
|
|
|
|
|
|
|
|
| 521 |
|
| 522 |
avs.add_vertical_space(2)
|
| 523 |
with st.expander("Common words between Resume and Job Description:"):
|
| 524 |
+
annotated_text(
|
| 525 |
+
create_annotated_text(
|
| 526 |
+
selected_file["clean_data"],
|
| 527 |
+
selected_jd["extracted_keywords"],
|
| 528 |
+
"JD",
|
| 529 |
+
"#F24C3D",
|
| 530 |
+
)
|
| 531 |
+
)
|
| 532 |
|
| 533 |
st.divider()
|
| 534 |
|
| 535 |
# Go back to top
|
| 536 |
+
st.markdown("[:arrow_up: Back to Top](#resume-matcher)")
|
streamlit_second.py
CHANGED
|
@@ -1,20 +1,26 @@
|
|
| 1 |
-
import
|
| 2 |
from typing import List
|
| 3 |
-
|
|
|
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
-
import json
|
| 6 |
import plotly.express as px
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
-
|
| 9 |
-
from streamlit_extras import add_vertical_space as avs
|
| 10 |
from annotated_text import annotated_text, parameters
|
|
|
|
| 11 |
from streamlit_extras.badges import badge
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
# Set page configuration
|
| 15 |
-
st.set_page_config(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
nltk.download(
|
| 18 |
|
| 19 |
parameters.SHOW_LABEL_SEPARATOR = False
|
| 20 |
parameters.BORDER_RADIUS = 3
|
|
@@ -32,7 +38,7 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 32 |
# Add nodes and edges with weights to the graph
|
| 33 |
for node, weight in nodes_and_weights:
|
| 34 |
G.add_node(node)
|
| 35 |
-
G.add_edge(central_node, node, weight=weight*100)
|
| 36 |
|
| 37 |
# Get position layout for nodes
|
| 38 |
pos = nx.spring_layout(G)
|
|
@@ -46,8 +52,13 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 46 |
edge_x.extend([x0, x1, None])
|
| 47 |
edge_y.extend([y0, y1, None])
|
| 48 |
|
| 49 |
-
edge_trace = go.Scatter(
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Create node trace
|
| 53 |
node_x = []
|
|
@@ -57,10 +68,26 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 57 |
node_x.append(x)
|
| 58 |
node_y.append(y)
|
| 59 |
|
| 60 |
-
node_trace = go.Scatter(
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
# Color node points by number of connections
|
| 66 |
node_adjacencies = []
|
|
@@ -68,24 +95,32 @@ def create_star_graph(nodes_and_weights, title):
|
|
| 68 |
for node in G.nodes():
|
| 69 |
adjacencies = list(G.adj[node]) # changes here
|
| 70 |
node_adjacencies.append(len(adjacencies))
|
| 71 |
-
node_text.append(f
|
| 72 |
|
| 73 |
node_trace.marker.color = node_adjacencies
|
| 74 |
node_trace.text = node_text
|
| 75 |
|
| 76 |
# Create the figure
|
| 77 |
-
fig = go.Figure(
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
# Show the figure
|
| 85 |
st.plotly_chart(fig)
|
| 86 |
|
| 87 |
|
| 88 |
-
def create_annotated_text(
|
|
|
|
|
|
|
| 89 |
# Tokenize the input string
|
| 90 |
tokens = nltk.word_tokenize(input_string)
|
| 91 |
|
|
@@ -119,18 +154,26 @@ def tokenize_string(input_string):
|
|
| 119 |
|
| 120 |
|
| 121 |
# Display the main title and subheaders
|
| 122 |
-
st.title(
|
| 123 |
with st.sidebar:
|
| 124 |
-
st.image(
|
| 125 |
-
st.subheader(
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
st.markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
badge(type="github", name="srbhr/Resume-Matcher")
|
| 131 |
-
st.markdown(
|
| 132 |
badge(type="twitter", name="_srbhr_")
|
| 133 |
-
st.markdown(
|
|
|
|
|
|
|
| 134 |
badge(type="buymeacoffee", name="srbhr")
|
| 135 |
|
| 136 |
st.divider()
|
|
@@ -138,16 +181,20 @@ avs.add_vertical_space(1)
|
|
| 138 |
|
| 139 |
resume_names = get_filenames_from_dir("Data/Processed/Resumes")
|
| 140 |
|
| 141 |
-
output = st.selectbox(
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
avs.add_vertical_space(5)
|
| 144 |
|
| 145 |
-
selected_file = read_json("Data/Processed/Resumes/"+output)
|
| 146 |
|
| 147 |
avs.add_vertical_space(2)
|
| 148 |
st.markdown("#### Parsed Resume Data")
|
| 149 |
st.caption(
|
| 150 |
-
"This text is parsed from your resume. This is how it'll look like after getting parsed by an ATS."
|
|
|
|
| 151 |
st.caption("Utilize this to understand how to make your resume ATS friendly.")
|
| 152 |
avs.add_vertical_space(3)
|
| 153 |
# st.json(selected_file)
|
|
@@ -156,92 +203,122 @@ st.write(selected_file["clean_data"])
|
|
| 156 |
avs.add_vertical_space(3)
|
| 157 |
st.write("Now let's take a look at the extracted keywords from the resume.")
|
| 158 |
|
| 159 |
-
annotated_text(
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
avs.add_vertical_space(5)
|
| 164 |
st.write("Now let's take a look at the extracted entities from the resume.")
|
| 165 |
|
| 166 |
# Call the function with your data
|
| 167 |
-
create_star_graph(selected_file[
|
| 168 |
|
| 169 |
-
df2 = pd.DataFrame(selected_file[
|
| 170 |
|
| 171 |
# Create the dictionary
|
| 172 |
keyword_dict = {}
|
| 173 |
-
for keyword, value in selected_file[
|
| 174 |
-
keyword_dict[keyword] = value*100
|
| 175 |
-
|
| 176 |
-
fig = go.Figure(
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
st.plotly_chart(fig)
|
| 185 |
|
| 186 |
st.divider()
|
| 187 |
|
| 188 |
-
fig = px.treemap(
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
st.write(fig)
|
| 192 |
|
| 193 |
avs.add_vertical_space(5)
|
| 194 |
|
| 195 |
job_descriptions = get_filenames_from_dir("Data/Processed/JobDescription")
|
| 196 |
|
| 197 |
-
output = st.selectbox(
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
avs.add_vertical_space(5)
|
| 200 |
|
| 201 |
-
selected_jd = read_json(
|
| 202 |
-
"Data/Processed/JobDescription/"+output)
|
| 203 |
|
| 204 |
avs.add_vertical_space(2)
|
| 205 |
st.markdown("#### Job Description")
|
| 206 |
st.caption(
|
| 207 |
-
"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
|
|
|
|
| 208 |
avs.add_vertical_space(3)
|
| 209 |
# st.json(selected_file)
|
| 210 |
st.write(selected_jd["clean_data"])
|
| 211 |
|
| 212 |
st.markdown("#### Common Words between Job Description and Resumes Highlighted.")
|
| 213 |
|
| 214 |
-
annotated_text(
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
| 217 |
|
| 218 |
st.write("Now let's take a look at the extracted entities from the job description.")
|
| 219 |
|
| 220 |
# Call the function with your data
|
| 221 |
-
create_star_graph(selected_jd[
|
| 222 |
|
| 223 |
-
df2 = pd.DataFrame(selected_jd[
|
| 224 |
|
| 225 |
# Create the dictionary
|
| 226 |
keyword_dict = {}
|
| 227 |
-
for keyword, value in selected_jd[
|
| 228 |
-
keyword_dict[keyword] = value*100
|
| 229 |
-
|
| 230 |
-
fig = go.Figure(
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
st.plotly_chart(fig)
|
| 239 |
|
| 240 |
st.divider()
|
| 241 |
|
| 242 |
-
fig = px.treemap(
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
st.write(fig)
|
| 246 |
|
| 247 |
avs.add_vertical_space(5)
|
|
@@ -252,69 +329,135 @@ st.markdown("## Vector Similarity Scores")
|
|
| 252 |
st.caption("Powered by Qdrant Vector Search")
|
| 253 |
st.info("These are pre-computed queries", icon="ℹ")
|
| 254 |
st.warning(
|
| 255 |
-
"Running Qdrant or Sentence Transformers without having capacity is not recommended",
|
|
|
|
|
|
|
| 256 |
|
| 257 |
|
| 258 |
# Your data
|
| 259 |
data = [
|
| 260 |
-
{
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
{
|
| 271 |
-
'
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
{
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
{
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
]
|
| 301 |
|
| 302 |
# Create a DataFrame
|
| 303 |
df = pd.DataFrame(data)
|
| 304 |
|
| 305 |
# Create different DataFrames based on the query and sort by score
|
| 306 |
-
df1 = df[df[
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
by=
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
|
| 316 |
def plot_df(df, title):
|
| 317 |
-
fig = px.bar(df, x=
|
| 318 |
st.plotly_chart(fig)
|
| 319 |
|
| 320 |
|
|
@@ -328,13 +471,13 @@ st.text("Bruce Wayne : Fullstack Developer (MERN)")
|
|
| 328 |
st.text("John Doe : Fullstack Developer (Java)")
|
| 329 |
|
| 330 |
|
| 331 |
-
plot_df(df1,
|
| 332 |
-
plot_df(df2,
|
| 333 |
-
plot_df(df3,
|
| 334 |
-
plot_df(df4,
|
| 335 |
|
| 336 |
|
| 337 |
avs.add_vertical_space(3)
|
| 338 |
|
| 339 |
# Go back to top
|
| 340 |
-
st.markdown(
|
|
|
|
| 1 |
+
import json
|
| 2 |
from typing import List
|
| 3 |
+
|
| 4 |
+
import networkx as nx
|
| 5 |
+
import nltk
|
| 6 |
import pandas as pd
|
|
|
|
| 7 |
import plotly.express as px
|
| 8 |
import plotly.graph_objects as go
|
| 9 |
+
import streamlit as st
|
|
|
|
| 10 |
from annotated_text import annotated_text, parameters
|
| 11 |
+
from streamlit_extras import add_vertical_space as avs
|
| 12 |
from streamlit_extras.badges import badge
|
| 13 |
+
|
| 14 |
+
from scripts.utils import get_filenames_from_dir
|
| 15 |
|
| 16 |
# Set page configuration
|
| 17 |
+
st.set_page_config(
|
| 18 |
+
page_title="Resume Matcher",
|
| 19 |
+
page_icon="Assets/img/favicon.ico",
|
| 20 |
+
initial_sidebar_state="auto",
|
| 21 |
+
)
|
| 22 |
|
| 23 |
+
nltk.download("punkt")
|
| 24 |
|
| 25 |
parameters.SHOW_LABEL_SEPARATOR = False
|
| 26 |
parameters.BORDER_RADIUS = 3
|
|
|
|
| 38 |
# Add nodes and edges with weights to the graph
|
| 39 |
for node, weight in nodes_and_weights:
|
| 40 |
G.add_node(node)
|
| 41 |
+
G.add_edge(central_node, node, weight=weight * 100)
|
| 42 |
|
| 43 |
# Get position layout for nodes
|
| 44 |
pos = nx.spring_layout(G)
|
|
|
|
| 52 |
edge_x.extend([x0, x1, None])
|
| 53 |
edge_y.extend([y0, y1, None])
|
| 54 |
|
| 55 |
+
edge_trace = go.Scatter(
|
| 56 |
+
x=edge_x,
|
| 57 |
+
y=edge_y,
|
| 58 |
+
line=dict(width=0.5, color="#888"),
|
| 59 |
+
hoverinfo="none",
|
| 60 |
+
mode="lines",
|
| 61 |
+
)
|
| 62 |
|
| 63 |
# Create node trace
|
| 64 |
node_x = []
|
|
|
|
| 68 |
node_x.append(x)
|
| 69 |
node_y.append(y)
|
| 70 |
|
| 71 |
+
node_trace = go.Scatter(
|
| 72 |
+
x=node_x,
|
| 73 |
+
y=node_y,
|
| 74 |
+
mode="markers",
|
| 75 |
+
hoverinfo="text",
|
| 76 |
+
marker=dict(
|
| 77 |
+
showscale=True,
|
| 78 |
+
colorscale="Rainbow",
|
| 79 |
+
reversescale=True,
|
| 80 |
+
color=[],
|
| 81 |
+
size=10,
|
| 82 |
+
colorbar=dict(
|
| 83 |
+
thickness=15,
|
| 84 |
+
title="Node Connections",
|
| 85 |
+
xanchor="left",
|
| 86 |
+
titleside="right",
|
| 87 |
+
),
|
| 88 |
+
line_width=2,
|
| 89 |
+
),
|
| 90 |
+
)
|
| 91 |
|
| 92 |
# Color node points by number of connections
|
| 93 |
node_adjacencies = []
|
|
|
|
| 95 |
for node in G.nodes():
|
| 96 |
adjacencies = list(G.adj[node]) # changes here
|
| 97 |
node_adjacencies.append(len(adjacencies))
|
| 98 |
+
node_text.append(f"{node}<br># of connections: {len(adjacencies)}")
|
| 99 |
|
| 100 |
node_trace.marker.color = node_adjacencies
|
| 101 |
node_trace.text = node_text
|
| 102 |
|
| 103 |
# Create the figure
|
| 104 |
+
fig = go.Figure(
|
| 105 |
+
data=[edge_trace, node_trace],
|
| 106 |
+
layout=go.Layout(
|
| 107 |
+
title=title,
|
| 108 |
+
titlefont_size=16,
|
| 109 |
+
showlegend=False,
|
| 110 |
+
hovermode="closest",
|
| 111 |
+
margin=dict(b=20, l=5, r=5, t=40),
|
| 112 |
+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 113 |
+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
| 114 |
+
),
|
| 115 |
+
)
|
| 116 |
|
| 117 |
# Show the figure
|
| 118 |
st.plotly_chart(fig)
|
| 119 |
|
| 120 |
|
| 121 |
+
def create_annotated_text(
|
| 122 |
+
input_string: str, word_list: List[str], annotation: str, color_code: str
|
| 123 |
+
):
|
| 124 |
# Tokenize the input string
|
| 125 |
tokens = nltk.word_tokenize(input_string)
|
| 126 |
|
|
|
|
| 154 |
|
| 155 |
|
| 156 |
# Display the main title and subheaders
|
| 157 |
+
st.title(":blue[Resume Matcher]")
|
| 158 |
with st.sidebar:
|
| 159 |
+
st.image("Assets/img/header_image.png")
|
| 160 |
+
st.subheader(
|
| 161 |
+
"Free and Open Source ATS to help your resume pass the screening stage."
|
| 162 |
+
)
|
| 163 |
+
st.markdown(
|
| 164 |
+
"Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)"
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
st.markdown(
|
| 168 |
+
"Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)"
|
| 169 |
+
)
|
| 170 |
|
| 171 |
badge(type="github", name="srbhr/Resume-Matcher")
|
| 172 |
+
st.markdown("For updates follow me on Twitter.")
|
| 173 |
badge(type="twitter", name="_srbhr_")
|
| 174 |
+
st.markdown(
|
| 175 |
+
"If you like the project and would like to further help in development please consider 👇"
|
| 176 |
+
)
|
| 177 |
badge(type="buymeacoffee", name="srbhr")
|
| 178 |
|
| 179 |
st.divider()
|
|
|
|
| 181 |
|
| 182 |
resume_names = get_filenames_from_dir("Data/Processed/Resumes")
|
| 183 |
|
| 184 |
+
output = st.selectbox(
|
| 185 |
+
f"There are {len(resume_names)} resumes present. Please select one from the menu below:",
|
| 186 |
+
resume_names,
|
| 187 |
+
)
|
| 188 |
|
| 189 |
avs.add_vertical_space(5)
|
| 190 |
|
| 191 |
+
selected_file = read_json("Data/Processed/Resumes/" + output)
|
| 192 |
|
| 193 |
avs.add_vertical_space(2)
|
| 194 |
st.markdown("#### Parsed Resume Data")
|
| 195 |
st.caption(
|
| 196 |
+
"This text is parsed from your resume. This is how it'll look like after getting parsed by an ATS."
|
| 197 |
+
)
|
| 198 |
st.caption("Utilize this to understand how to make your resume ATS friendly.")
|
| 199 |
avs.add_vertical_space(3)
|
| 200 |
# st.json(selected_file)
|
|
|
|
| 203 |
avs.add_vertical_space(3)
|
| 204 |
st.write("Now let's take a look at the extracted keywords from the resume.")
|
| 205 |
|
| 206 |
+
annotated_text(
|
| 207 |
+
create_annotated_text(
|
| 208 |
+
selected_file["clean_data"],
|
| 209 |
+
selected_file["extracted_keywords"],
|
| 210 |
+
"KW",
|
| 211 |
+
"#0B666A",
|
| 212 |
+
)
|
| 213 |
+
)
|
| 214 |
|
| 215 |
avs.add_vertical_space(5)
|
| 216 |
st.write("Now let's take a look at the extracted entities from the resume.")
|
| 217 |
|
| 218 |
# Call the function with your data
|
| 219 |
+
create_star_graph(selected_file["keyterms"], "Entities from Resume")
|
| 220 |
|
| 221 |
+
df2 = pd.DataFrame(selected_file["keyterms"], columns=["keyword", "value"])
|
| 222 |
|
| 223 |
# Create the dictionary
|
| 224 |
keyword_dict = {}
|
| 225 |
+
for keyword, value in selected_file["keyterms"]:
|
| 226 |
+
keyword_dict[keyword] = value * 100
|
| 227 |
+
|
| 228 |
+
fig = go.Figure(
|
| 229 |
+
data=[
|
| 230 |
+
go.Table(
|
| 231 |
+
header=dict(
|
| 232 |
+
values=["Keyword", "Value"], font=dict(size=12), fill_color="#070A52"
|
| 233 |
+
),
|
| 234 |
+
cells=dict(
|
| 235 |
+
values=[list(keyword_dict.keys()), list(keyword_dict.values())],
|
| 236 |
+
line_color="darkslategray",
|
| 237 |
+
fill_color="#6DA9E4",
|
| 238 |
+
),
|
| 239 |
+
)
|
| 240 |
+
]
|
| 241 |
+
)
|
| 242 |
st.plotly_chart(fig)
|
| 243 |
|
| 244 |
st.divider()
|
| 245 |
|
| 246 |
+
fig = px.treemap(
|
| 247 |
+
df2,
|
| 248 |
+
path=["keyword"],
|
| 249 |
+
values="value",
|
| 250 |
+
color_continuous_scale="Rainbow",
|
| 251 |
+
title="Key Terms/Topics Extracted from your Resume",
|
| 252 |
+
)
|
| 253 |
st.write(fig)
|
| 254 |
|
| 255 |
avs.add_vertical_space(5)
|
| 256 |
|
| 257 |
job_descriptions = get_filenames_from_dir("Data/Processed/JobDescription")
|
| 258 |
|
| 259 |
+
output = st.selectbox(
|
| 260 |
+
f"There are {len(job_descriptions)} job descriptions present. Please select one from the menu below:",
|
| 261 |
+
job_descriptions,
|
| 262 |
+
)
|
| 263 |
|
| 264 |
avs.add_vertical_space(5)
|
| 265 |
|
| 266 |
+
selected_jd = read_json("Data/Processed/JobDescription/" + output)
|
|
|
|
| 267 |
|
| 268 |
avs.add_vertical_space(2)
|
| 269 |
st.markdown("#### Job Description")
|
| 270 |
st.caption(
|
| 271 |
+
"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
|
| 272 |
+
)
|
| 273 |
avs.add_vertical_space(3)
|
| 274 |
# st.json(selected_file)
|
| 275 |
st.write(selected_jd["clean_data"])
|
| 276 |
|
| 277 |
st.markdown("#### Common Words between Job Description and Resumes Highlighted.")
|
| 278 |
|
| 279 |
+
annotated_text(
|
| 280 |
+
create_annotated_text(
|
| 281 |
+
selected_file["clean_data"], selected_jd["extracted_keywords"], "JD", "#F24C3D"
|
| 282 |
+
)
|
| 283 |
+
)
|
| 284 |
|
| 285 |
st.write("Now let's take a look at the extracted entities from the job description.")
|
| 286 |
|
| 287 |
# Call the function with your data
|
| 288 |
+
create_star_graph(selected_jd["keyterms"], "Entities from Job Description")
|
| 289 |
|
| 290 |
+
df2 = pd.DataFrame(selected_jd["keyterms"], columns=["keyword", "value"])
|
| 291 |
|
| 292 |
# Create the dictionary
|
| 293 |
keyword_dict = {}
|
| 294 |
+
for keyword, value in selected_jd["keyterms"]:
|
| 295 |
+
keyword_dict[keyword] = value * 100
|
| 296 |
+
|
| 297 |
+
fig = go.Figure(
|
| 298 |
+
data=[
|
| 299 |
+
go.Table(
|
| 300 |
+
header=dict(
|
| 301 |
+
values=["Keyword", "Value"], font=dict(size=12), fill_color="#070A52"
|
| 302 |
+
),
|
| 303 |
+
cells=dict(
|
| 304 |
+
values=[list(keyword_dict.keys()), list(keyword_dict.values())],
|
| 305 |
+
line_color="darkslategray",
|
| 306 |
+
fill_color="#6DA9E4",
|
| 307 |
+
),
|
| 308 |
+
)
|
| 309 |
+
]
|
| 310 |
+
)
|
| 311 |
st.plotly_chart(fig)
|
| 312 |
|
| 313 |
st.divider()
|
| 314 |
|
| 315 |
+
fig = px.treemap(
|
| 316 |
+
df2,
|
| 317 |
+
path=["keyword"],
|
| 318 |
+
values="value",
|
| 319 |
+
color_continuous_scale="Rainbow",
|
| 320 |
+
title="Key Terms/Topics Extracted from the selected Job Description",
|
| 321 |
+
)
|
| 322 |
st.write(fig)
|
| 323 |
|
| 324 |
avs.add_vertical_space(5)
|
|
|
|
| 329 |
st.caption("Powered by Qdrant Vector Search")
|
| 330 |
st.info("These are pre-computed queries", icon="ℹ")
|
| 331 |
st.warning(
|
| 332 |
+
"Running Qdrant or Sentence Transformers without having capacity is not recommended",
|
| 333 |
+
icon="⚠",
|
| 334 |
+
)
|
| 335 |
|
| 336 |
|
| 337 |
# Your data
|
| 338 |
data = [
|
| 339 |
+
{
|
| 340 |
+
"text": "{'resume': 'Alfred Pennyworth",
|
| 341 |
+
"query": "Job Description Product Manager",
|
| 342 |
+
"score": 0.62658,
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"text": "{'resume': 'Barry Allen",
|
| 346 |
+
"query": "Job Description Product Manager",
|
| 347 |
+
"score": 0.43777737,
|
| 348 |
+
},
|
| 349 |
+
{
|
| 350 |
+
"text": "{'resume': 'Bruce Wayne ",
|
| 351 |
+
"query": "Job Description Product Manager",
|
| 352 |
+
"score": 0.39835533,
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"text": "{'resume': 'JOHN DOE",
|
| 356 |
+
"query": "Job Description Product Manager",
|
| 357 |
+
"score": 0.3915512,
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"text": "{'resume': 'Harvey Dent",
|
| 361 |
+
"query": "Job Description Product Manager",
|
| 362 |
+
"score": 0.3519544,
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"text": "{'resume': 'Barry Allen",
|
| 366 |
+
"query": "Job Description Senior Full Stack Engineer",
|
| 367 |
+
"score": 0.6541866,
|
| 368 |
+
},
|
| 369 |
+
{
|
| 370 |
+
"text": "{'resume': 'Alfred Pennyworth",
|
| 371 |
+
"query": "Job Description Senior Full Stack Engineer",
|
| 372 |
+
"score": 0.59806436,
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"text": "{'resume': 'JOHN DOE",
|
| 376 |
+
"query": "Job Description Senior Full Stack Engineer",
|
| 377 |
+
"score": 0.5951386,
|
| 378 |
+
},
|
| 379 |
+
{
|
| 380 |
+
"text": "{'resume': 'Bruce Wayne ",
|
| 381 |
+
"query": "Job Description Senior Full Stack Engineer",
|
| 382 |
+
"score": 0.57700855,
|
| 383 |
+
},
|
| 384 |
+
{
|
| 385 |
+
"text": "{'resume': 'Harvey Dent",
|
| 386 |
+
"query": "Job Description Senior Full Stack Engineer",
|
| 387 |
+
"score": 0.38489106,
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"text": "{'resume': 'Barry Allen",
|
| 391 |
+
"query": "Job Description Front End Engineer",
|
| 392 |
+
"score": 0.76813436,
|
| 393 |
+
},
|
| 394 |
+
{
|
| 395 |
+
"text": "{'resume': 'Bruce Wayne'",
|
| 396 |
+
"query": "Job Description Front End Engineer",
|
| 397 |
+
"score": 0.60440844,
|
| 398 |
+
},
|
| 399 |
+
{
|
| 400 |
+
"text": "{'resume': 'JOHN DOE",
|
| 401 |
+
"query": "Job Description Front End Engineer",
|
| 402 |
+
"score": 0.56080043,
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"text": "{'resume': 'Alfred Pennyworth",
|
| 406 |
+
"query": "Job Description Front End Engineer",
|
| 407 |
+
"score": 0.5395049,
|
| 408 |
+
},
|
| 409 |
+
{
|
| 410 |
+
"text": "{'resume': 'Harvey Dent",
|
| 411 |
+
"query": "Job Description Front End Engineer",
|
| 412 |
+
"score": 0.3859515,
|
| 413 |
+
},
|
| 414 |
+
{
|
| 415 |
+
"text": "{'resume': 'JOHN DOE",
|
| 416 |
+
"query": "Job Description Java Developer",
|
| 417 |
+
"score": 0.5449441,
|
| 418 |
+
},
|
| 419 |
+
{
|
| 420 |
+
"text": "{'resume': 'Alfred Pennyworth",
|
| 421 |
+
"query": "Job Description Java Developer",
|
| 422 |
+
"score": 0.53476423,
|
| 423 |
+
},
|
| 424 |
+
{
|
| 425 |
+
"text": "{'resume': 'Barry Allen",
|
| 426 |
+
"query": "Job Description Java Developer",
|
| 427 |
+
"score": 0.5313871,
|
| 428 |
+
},
|
| 429 |
+
{
|
| 430 |
+
"text": "{'resume': 'Bruce Wayne ",
|
| 431 |
+
"query": "Job Description Java Developer",
|
| 432 |
+
"score": 0.44446343,
|
| 433 |
+
},
|
| 434 |
+
{
|
| 435 |
+
"text": "{'resume': 'Harvey Dent",
|
| 436 |
+
"query": "Job Description Java Developer",
|
| 437 |
+
"score": 0.3616274,
|
| 438 |
+
},
|
| 439 |
]
|
| 440 |
|
| 441 |
# Create a DataFrame
|
| 442 |
df = pd.DataFrame(data)
|
| 443 |
|
| 444 |
# Create different DataFrames based on the query and sort by score
|
| 445 |
+
df1 = df[df["query"] == "Job Description Product Manager"].sort_values(
|
| 446 |
+
by="score", ascending=False
|
| 447 |
+
)
|
| 448 |
+
df2 = df[df["query"] == "Job Description Senior Full Stack Engineer"].sort_values(
|
| 449 |
+
by="score", ascending=False
|
| 450 |
+
)
|
| 451 |
+
df3 = df[df["query"] == "Job Description Front End Engineer"].sort_values(
|
| 452 |
+
by="score", ascending=False
|
| 453 |
+
)
|
| 454 |
+
df4 = df[df["query"] == "Job Description Java Developer"].sort_values(
|
| 455 |
+
by="score", ascending=False
|
| 456 |
+
)
|
| 457 |
|
| 458 |
|
| 459 |
def plot_df(df, title):
|
| 460 |
+
fig = px.bar(df, x="text", y=df["score"] * 100, title=title)
|
| 461 |
st.plotly_chart(fig)
|
| 462 |
|
| 463 |
|
|
|
|
| 471 |
st.text("John Doe : Fullstack Developer (Java)")
|
| 472 |
|
| 473 |
|
| 474 |
+
plot_df(df1, "Job Description Product Manager 10+ Years of Exper")
|
| 475 |
+
plot_df(df2, "Job Description Senior Full Stack Engineer 5+ Year")
|
| 476 |
+
plot_df(df3, "Job Description Front End Engineer 2 Years of Expe")
|
| 477 |
+
plot_df(df4, "Job Description Java Developer 3 Years of Experien")
|
| 478 |
|
| 479 |
|
| 480 |
avs.add_vertical_space(3)
|
| 481 |
|
| 482 |
# Go back to top
|
| 483 |
+
st.markdown("[:arrow_up: Back to Top](#resume-matcher)")
|