Spaces:
Runtime error
Runtime error
Upload 6 files
Browse files- src/config.py +223 -0
- src/logging_config.py +42 -0
- src/main.py +61 -0
- src/resume_parser.py +1274 -0
- src/reviewer.py +33 -0
- src/submitter.py +32 -0
src/config.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
BASE_DIR = os.getcwd()
|
| 4 |
+
UPLOAD_FOLDER = os.path.join(BASE_DIR, '..', 'data', 'uploads')
|
| 5 |
+
ALLOWED_EXTENSIONS = {'pdf'}
|
| 6 |
+
linkedin_domain = (r'https?://(www\.)?linkedin\.com/[^\s<>"]')
|
| 7 |
+
github_domain = (r'https?://(www\.)?github\.com/[^\s<>"]')
|
| 8 |
+
kaggle_domain = (r'https?://(www\.)?kaggle\.com/[^\s<>"]')
|
| 9 |
+
medium_domain = (r'https?://(www\.)?medium\.com/[^\s<>"]')
|
| 10 |
+
hackerrank_domain = (r'https?://(www\.)?hackerrank\.com/[^\s<>"]')
|
| 11 |
+
leetcode_domain = (r'https?://(www\.)?leetcode\.com/[^\s<>"]')
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
required_sections = ['PROFILE SUMMARY','ACADEMIC PROFILE','TECHNICAL SKILLS','CERTIFICATIONS','PROJECTS','CAREER OBJECTIVE']
|
| 16 |
+
|
| 17 |
+
basic_informations = ["name", "contact_number", "email", "linkedin_urls", "github_urls"]
|
| 18 |
+
|
| 19 |
+
data_science_skills = ['queries', 'beautifulsoup', 'ms excel', 'mathematics', 'selenium',
|
| 20 |
+
'html', 'analytical skills', 'statsmodels','ai', 'improvement',
|
| 21 |
+
'analyze', 'metrics', 'forecasting', 'analytics', 'analytical',
|
| 22 |
+
'mysql', 'postgresql', 'database', 'writing', 'excel','regulations',
|
| 23 |
+
'algorithms', 'scipy', 'opencv', 'reports', 'eda', 'jupyter',
|
| 24 |
+
'presentations', 'modeling', 'audit', 'technical skills',
|
| 25 |
+
'schedule', 'nltk', 'iso', 'xgboost', 'segmentation', 'github',
|
| 26 |
+
'seaborn', 'keras', 'distribution', 'investigation', 'tableau',
|
| 27 |
+
'probability', 'analysis', 'r', 'technical', 'programming',
|
| 28 |
+
'web scraping', 'research', 'pandas', 'statistical analysis',
|
| 29 |
+
'numpy', 'predictive analysis', 'tensorflow', 'hypothesis',
|
| 30 |
+
'matplotlib', 'scikit-learn', 'information technology',
|
| 31 |
+
'machine learning', 'cloud', 'streamlit', 'mining', 'python',
|
| 32 |
+
'data analytics', 'deep learning', 'testing', 'training',
|
| 33 |
+
'clustering & classification', 'data analysis', 'engineering',
|
| 34 |
+
'data visualization', 'quantitative analysis', 'statistics',
|
| 35 |
+
'flask', 'statistical modeling', 'pytorch', 'data mining',
|
| 36 |
+
'aws', 'sql']
|
| 37 |
+
|
| 38 |
+
essential_skills = ["Python", "SQL", "MySQL", "Tableau", "NumPy",
|
| 39 |
+
"Statsmodels", "CNN", "ANN",
|
| 40 |
+
"RNN", "Machine Learning", "Deep Learning", "SciKit Learn", "MS Excel",
|
| 41 |
+
"Data Visualization", "Power BI", "Data Analysis"]
|
| 42 |
+
|
| 43 |
+
quality_mapping = {
|
| 44 |
+
'Resume needs significant improvement': 0.15,
|
| 45 |
+
'Resume needs improvement': 0.35,
|
| 46 |
+
'Resume is average': 0.55,
|
| 47 |
+
'Resume is good': 0.75,
|
| 48 |
+
'Resume is very good': 0.90,
|
| 49 |
+
'Resume is excellent': 1,
|
| 50 |
+
'The resume is bad': 1.1
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
keyword_variations = {
|
| 54 |
+
"Python": ["Python", "Python_Language", "Python Programming"],
|
| 55 |
+
"SQL": ["SQL", "SQL_Language", "Structured Query Language", "Structured_Query_Language"],
|
| 56 |
+
"MySQL": ["MySQL", "MySQL_Database", "My_SQL", "My SQL"],
|
| 57 |
+
"Pandas": ["Pandas", "Pandas_Library", "Pandas Data Analysis Library","Pandas_Data Analysis_Library"],
|
| 58 |
+
"R": [" R ", "R_Programming", "R Language",",R "," R,", ",R,"],
|
| 59 |
+
"Matplotlib": ["Matplotlib", "Matplotlib_Library", "Matplotlib Plotting Library","Matplotlib_Plotting_Library"],
|
| 60 |
+
"Seaborn": ["Seaborn", "Seaborn_Library", "Seaborn Data Visualization Library"],
|
| 61 |
+
"StatsModel": ["StatsModel", "StatsModel_Library", "StatsModel Statistical Library", "Statistical Modeling Library", "Statistics Modeling", "StatModelLib", "StatsMod", "SM Library", "SM"],
|
| 62 |
+
"Tableau": ["Tableau", "Tableau_Software", "Tableau Data Visualization", "Tableau Analytics", "Tableau BI Tool", "Tableau Visualization Software", "Tableau Data Analysis", "Tableau BI","TableauBI"],
|
| 63 |
+
"TensorFlow": ["TensorFlow", "TensorFlow_Library"],
|
| 64 |
+
"NumPy": ["NumPy", "NumPy_Library", "Numerical Computing Library"],
|
| 65 |
+
"PyTorch": ["PyTorch", "PyTorch_Library"],
|
| 66 |
+
"Keras": ["Keras", "Keras_Library"],
|
| 67 |
+
"Plotly": ["Plotly", "Plotly_Library",],
|
| 68 |
+
"RFM": ["RFM", "RFM_Analysis", "Recency Frequency Monetary Analysis"],
|
| 69 |
+
"ANOVA": ["ANOVA", "ANOVA_Test", "Analysis of Variance","Analysis_of_Variance"],
|
| 70 |
+
"BeautifulSoup": ["BeautifulSoup", "BeautifulSoup_Library"],
|
| 71 |
+
"Imputation": ["Imputation", "Data_Imputation","Data Imputation", "Missing Data Imputation"],
|
| 72 |
+
"Scrappy": ["Scrappy", "Scrappy_Library"],
|
| 73 |
+
"Selenium": ["Selenium", "Selenium_Library", "Selenium WebDriver", "Selenium Automation"],
|
| 74 |
+
"TensorBoard": ["TensorBoard", "TensorBoard_Library", "TensorBoard Visualization Tool"],
|
| 75 |
+
"SciPy": ["SciPy", "SciPy_Library", "Scientific Computing Library"],
|
| 76 |
+
"OpenCV": ["OpenCV", "OpenCV_Library", "Computer Vision Library"],
|
| 77 |
+
"NLTK": ["NLTK", "NLTK_Library", "Natural Language Toolkit"],
|
| 78 |
+
"Hadoop": ["Hadoop", "Hadoop_Framework"],
|
| 79 |
+
"Spark": ["Spark", "Spark_Framework", "Apache_Spark"],
|
| 80 |
+
"spacy": ["spacy","Spacy_Library"],
|
| 81 |
+
"AdaBoost": ["AdaBoost","Ada_Boost","Ada Boost", "AdaBoost_Algorithm", "Adaptive Boosting","Adaptive_Boosting"],
|
| 82 |
+
"XGBoost": ["XGBoost","XG_Boost","XG Boost", "XGBoost_Algorithm", "Extreme Gradient Boosting"],
|
| 83 |
+
"CNN": [" CNN ", "CNN,", ",CNN", "Convolutional Neural Network", "ConvNet", "CNN Algorithm","CNN"],
|
| 84 |
+
"ANN": [" ANN ", "ANN,", ",ANN", "Artificial Neural Network", "ANN Algorithm","ANN"],
|
| 85 |
+
"RNN": [" RNN ", "RNN,", ",RNN", "Recurrent Neural Network", "RNN Algorithm","RNN"],
|
| 86 |
+
"KNN": [" kNN ", "kNN,", ",kNN","K-Nearest Neighbours", "K_Nearest_Neighbours", "K-Nearest-Neighbours", "K Nearest Neighbours", "KNN"],
|
| 87 |
+
"LSTM": ["LSTM", "Long Short-Term Memory", "LSTM Network", "LSTM Algorithm"],
|
| 88 |
+
"GAN": [" GAN ", "GAN,", ",GAN", "Generative Adversarial Network", "GAN Algorithm"," GAN "],
|
| 89 |
+
"YOLO": ["YOLO", "You Only Look Once", "YOLO_Algorithm"],
|
| 90 |
+
"Clustering": ["Clustering", "Clustering_Algorithms", "Data_Clustering"],
|
| 91 |
+
"Classification": ["Classification", "Classification_Algorithms", "Data_Classification"],
|
| 92 |
+
"Word2Vec": ["Word2Vec", "Word2Vec_Algorithm", "Word2Vec Word Embeddings","word2vector"],
|
| 93 |
+
"Tf-idf": ["Tf-idf","Tf_idf","Tf idf", "Term Frequency-Inverse Document Frequency", "Tf_idf_Algorithm","Tf-idf_Algorithm"],
|
| 94 |
+
"Tokenization": ["Tokenization", "Text_Tokenization", "Word_Tokenization"],
|
| 95 |
+
"Machine Learning": ["Machine Learning", "Machine_Learning", "Machine Learning Algorithms", "Machine_Learning_Algorithms", "ML"],
|
| 96 |
+
"Deep Learning": ["Deep Learning", "Deep_Learning", "Deep Learning Algorithms", "Deep_Learning_Algorithms", "DL"],
|
| 97 |
+
"SciKit Learn": ["SciKit Learn", "SciKit_Learn", "Sci Kit Learn", "SciKit-Learn","Sci_Kit_Learn", "sklearn","sk_learn"],
|
| 98 |
+
"Hugging Face": ["Hugging Face", "Hugging_Face", "HuggingFace"],
|
| 99 |
+
"MS Excel": ["Excel", "MS Excel","MSExcel", "MS_Excel", "Microsoft_Excel", "Microsoft Excel", "advance_excel","advance_MS_excel","advance_MSexcel", "advance excel", "Advance_Microsoft_excel", "Advance Microsoft excel"],
|
| 100 |
+
"Data Visualization": ["Data Visualization", "Data_Visualization", "Data_Viz", "Visualization"],
|
| 101 |
+
"Power BI": ["Power BI", "Power_BI", "Microsoft_Power_BI", "Microsoft Power BI","PowerBI"],
|
| 102 |
+
"Transfer Learning": ["Transfer Learning", "Transfer_Learning"],
|
| 103 |
+
"Linear Regression": ["Linear Regression", "Linear_Regression"],
|
| 104 |
+
"Logistic Regression": ["Logistic Regression", "Logistic_Regression"],
|
| 105 |
+
"Decision Tree": ["Decision Tree", "Decision_Tree"],
|
| 106 |
+
"Random Forest": ["Random Forest", "Random_Forest"],
|
| 107 |
+
"K-Means Clustering": ["K-Means Clustering", "K_Means_Clustering", "K-Means-Clustering", "K Means Clustering", "K-means", "k_means","K-mean", "k_mean"],
|
| 108 |
+
"T-test": ["T-test", "T_Test", "T Test"],
|
| 109 |
+
"Z-test": ["Z-test", "Z_Test", "Z Test"],
|
| 110 |
+
"Hypothesis Testing": ["Hypothesis Testing", "Hypothesis_Testing"],
|
| 111 |
+
"Chi-square": ["Chi-square", "Chi_Square", "Chi2"],
|
| 112 |
+
"Normal Distribution": ["Normal Distribution", "Normal_Distribution"],
|
| 113 |
+
"Correlation Analysis": ["Correlation Analysis", "Correlation_Analysis"],
|
| 114 |
+
"Feature Scaling": ["Feature Scaling", "Feature_Scaling"],
|
| 115 |
+
"Dimensionality Reduction": ["Dimensionality Reduction", "Dimensionality_Reduction"],
|
| 116 |
+
"Jupyter Notebook": ["Jupyter Notebook", "Jupyter_Notebook"],
|
| 117 |
+
"Google Colab": ["Google Colab", "Google_Colab"],
|
| 118 |
+
"Data Analysis": ["Data Analysis", "Data_Analysis"],
|
| 119 |
+
"Big Data": ["Big Data", "Big_Data"],
|
| 120 |
+
"Support Vector Machines (SVM)": ["Support Vector Machines (SVM)", "Support_Vector_Machines", "SVM", "Support Vector Machines", "Support_Vector_Machines_SVM"],
|
| 121 |
+
"Natural Language Processing": ["Natural Language Processing", "Natural_Language_Processing", "NLP"],
|
| 122 |
+
"Artificial Intelligence": ["Artificial Intelligence", "Artificial_Intelligence"," AI ",",AI "," AI,","AI"],
|
| 123 |
+
"Naive Bayes": ["Naive Bayes", "Naive_Bayes"],
|
| 124 |
+
"Principal Component Analysis (PCA)": ["Principal Component Analysis (PCA)", "Principal_Component_Analysis", "Principal Component Analysis", "PCA"],
|
| 125 |
+
"Descriptive Statistics": ["Descriptive Statistics", "Descriptive_Statistics"],
|
| 126 |
+
"Inferential Statistics": ["Inferential Statistics", "Inferential_Statistics"],
|
| 127 |
+
"Gradient Boosting Machines (GBM)": ["Gradient Boosting Machines (GBM)", "Gradient_Boosting_Machines", "Gradient Boosting Machines", "GBM","Gradient Boosting","Gradient_Boosting"],
|
| 128 |
+
"Association Rule Learning (Apriori)": ["Association Rule Learning (Apriori)", "Association_Rule_Learning", "Association Rule Learning", "Apriori"],
|
| 129 |
+
"Hierarchical Clustering": ["Hierarchical Clustering", "Hierarchical_Clustering"],
|
| 130 |
+
"Image Segmentation": ["Image Segmentation", "Image_Segmentation"],
|
| 131 |
+
"Object Detection": ["Object Detection", "Object_Detection"],
|
| 132 |
+
"Encoder Decoder": ["Encoder - Decoder", "Encoder_Decoder","Encoder Decoder","Encoder Decode",
|
| 133 |
+
"Sequence-to-Sequence Models", "Seq2Seq Models", "Language Encoding", "Language Decoding", "Text Encoding", "Text Decoding",
|
| 134 |
+
"Image Encoding", "Image Decoding", "Audio Encoding", "Audio Decoding", "Video Encoding", "Video Decoding", "Speech Encoding", "Speech Decoding", "Data Compression",
|
| 135 |
+
"Data Encryption", "Data Decryption","Encoder","Decoder"],
|
| 136 |
+
"Word Embedding": ["Word Embedding", "Word_Embedding"],
|
| 137 |
+
"Bag of Words": ["Bag of Words", "Bag_of_Words"],
|
| 138 |
+
"Sentiment Analysis": ["Sentiment Analysis", "Sentiment_Analysis"],
|
| 139 |
+
"Predictive Analysis": ["Predictive Analysis", "Predictive_Analysis"],
|
| 140 |
+
"Statistical Modeling": ["Statistical Modeling", "Statistical_Modeling","Statistical_Analysis","Statistical Analysis"],
|
| 141 |
+
"Data Preprocessing": ["Data Preprocessing", "Data_Preprocessing"],
|
| 142 |
+
"Model Development": ["Model Development", "Model_Development"],
|
| 143 |
+
"Time Series Analysis": ["Time Series Analysis", "Time_Series_Analysis","TimeSeries","TimeSeries_Analysis"],
|
| 144 |
+
"Statistics Fundamentals": ["Statistics Fundamentals", "Statistics_Fundamentals"],
|
| 145 |
+
"Advanced ML": ["Advanced ML", "Advanced_ML", "Advanced Machine Learning", "Advanced_Machine_Learning", "Advanced-ML"],
|
| 146 |
+
"Advanced DL": ["Advanced DL", "Advanced_DL", "Advanced Deep Learning", "Advanced_Deep_Learning", "Advanced-DL"],
|
| 147 |
+
"EDA": ["EDA","Exploratory_Data_Analysis","Exploratory Data Analysis"],
|
| 148 |
+
"Data Mining":["Data Mining","Data_Mining"],
|
| 149 |
+
"Outlier Detection": ["Outlier_Detection","Outlier Detection"],
|
| 150 |
+
"Missing Values Handling": ["Missing Values Handling","Missing_Values_Handling","Missing Values"],
|
| 151 |
+
"Scaling Techniques": ["Scaling Techniques","Feature Scaling","Feature_Scaling","Data Scaling","Data_Scaling","Data Normalization","Data_Normalization","Standardization","Min-Max Scaling","Min-Max_Scaling","Normalization"],
|
| 152 |
+
"R2 and Adjusted R2": ["R2 Score","R2_Score","Adjusted_R2_Score","Adjusted R2 Score","R Squared Score","R_Squared_Score","R2 Accuracy","R2_Accuracy","Adjusted R2 Accuracy","R2 Metric","Adjusted R2 Metric"],
|
| 153 |
+
"Accuracy, Recall, F1 Score": ["Accuracy","Classification_Accuracy","Accuracy_Metrics","Recall","Precision","Recall_Score","F1 Score","F1-Score","F1_Metric","F1_Score","Classification_F1-Score"],
|
| 154 |
+
"MS Office": ["MS_Office","MS Office","Microsoft Office","MS Word","MS_Word","Microsoft_Office","Microsoft_Word","Microsoft Word"],
|
| 155 |
+
"Subquery": ["Subquery","Sub-query","Nested Query","Inner Query"],
|
| 156 |
+
"SQL Join": ["SQL Join","Join in SQL","Join"],
|
| 157 |
+
"Stemming": ["Stemming","Stemming Algorithm","Word Stemming","Stemming Techniques","Stemming in NLP","Text_Stemming","Text Stemming"],
|
| 158 |
+
"Stopwords": ["Stopwords","Stop Words","Common Words","Text Stopwords","Stopwords Removal","Removing Stopwords","Stopwords List","Stopwords in NLP"],
|
| 159 |
+
"docker_variations" : ["Docker Integration","Docker", "Docker Automation", "Advanced Docker","Advanced_Docker", "Docker Tools"],
|
| 160 |
+
"jenkins_variations" : ["Jenkins","Jenkins CI/CD","CI/CD", "Jenkins Automation", "Jenkins Pipeline", "Jenkins Plugins"],
|
| 161 |
+
"prometheus_variations" : ["Prometheus Monitoring", "Prometheus Metrics", "PromQL", "Prometheus Alerting"],
|
| 162 |
+
"cicd_variations" : ["Continuous Integration", "Continuous Deployment", "CI/CD Automation", "CI/CD Tools"],
|
| 163 |
+
"flask_variations" : ["Flask","Flask Framework", "Flask RESTful", "Flask Deployment", "Flask Security"],
|
| 164 |
+
"fastapi_variations" : ["FastAPI","FastAPI Framework", "FastAPI RESTful","FastAPI_RESTful", "FastAPI Deployment", "FastAPI Tools","FastAPI_Tools"],
|
| 165 |
+
"django_variations" : ["Django Framework", "Django Web Development", "Django REST Framework", "Django Deployment","Django"],
|
| 166 |
+
"aws_variations" : ["Amazon Web Services", "AWS Cloud", "AWS Services", "AWS Management","AWS","AWS_Cloud"],
|
| 167 |
+
"statistics_variations" : ["Statistical Analysis", "Descriptive Statistics", "Inferential Statistics", "Probability Theory"],
|
| 168 |
+
"hypothesis_testing_variations" : ["Null Hypothesis", "Alternative Hypothesis", "Significance Level", "Type I Error"],
|
| 169 |
+
"smote_variations" : ["Synthetic Minority Over-sampling Technique", "SMOTE Algorithm", "SMOTE Python", "SMOTE Applications","SMOTE"],
|
| 170 |
+
"mlflow_variations" : ["MLflow Framework", "MLflow Tracking", "MLflow Deployment", "MLflow Integration","MLflow"],
|
| 171 |
+
"packaging_variations" : ["Software Packaging", "Package Management", "Python Packaging", "Packaging Best Practices"],
|
| 172 |
+
"version_control_variations" : ["Git Version Control", "Git Commands", "Git Workflow", "Git Collaboration","Git"],
|
| 173 |
+
"communication skills" : ["communication_skills" , "communication_skill" ,"communication skills"],
|
| 174 |
+
"problem-solving": ["problem-solving","problem_solving", "problem-solving"],
|
| 175 |
+
"decision making" : ["decision making" , "decision-making","decision_making"]
|
| 176 |
+
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
Extract_sections = ["CAREER OBJECTIVE", "PROFILE SUMMARY"]
|
| 180 |
+
|
| 181 |
+
section_headers = [
|
| 182 |
+
"CAREER OBJECTIVE", "PROFILE SUMMARY", "WORK EXPERIENCE", "EDUCATION","ADDITIONAL INFORMATION AND HOBBIES",
|
| 183 |
+
"ACADEMIC PROFILE", "PROJECTS", "CERTIFICATIONS","SKILLS",
|
| 184 |
+
"PERSONAL SKILLS", "PERSONAL INFORMATION", "REFERENCES",
|
| 185 |
+
"EXTRACURRICULAR ACTIVITIES", "TECHNICAL SKILLS", "KEY SKILLS",
|
| 186 |
+
"ADDITIONAL INFORMATION", "CERTIFICATIONS & ACADEMIC ENDEAVOURS",
|
| 187 |
+
"AWARDS & ACCOLADES", "SOFTWARE SKILLS", "AWARDS"
|
| 188 |
+
]
|
| 189 |
+
|
| 190 |
+
common_projects = ["Titanic","Iris","MNIST", "COVID-19", "Bank Churn",
|
| 191 |
+
"Spam","Handwritten Digit","Heart Disease","House Price",
|
| 192 |
+
"Diabetes","Twitter", "Churn",
|
| 193 |
+
"Wine Quality", "Loan","Titanic Survival Prediction",
|
| 194 |
+
"Iris Flower Classification",
|
| 195 |
+
"House Price Prediction",
|
| 196 |
+
"MNIST Handwritten Digit Recognition",
|
| 197 |
+
"Customer Churn Prediction",
|
| 198 |
+
"Sentiment Analysis of Movie Reviews",
|
| 199 |
+
"Spam Email Detection",
|
| 200 |
+
"Fake News Detection",
|
| 201 |
+
"Image Classification with CNNs",
|
| 202 |
+
"Stock Price Prediction"]
|
| 203 |
+
|
| 204 |
+
suggested_projects = ["Predicting Patient Readmissions in Hospitals",
|
| 205 |
+
"Optimizing Ad Spend with Machine Learning Models","Developing a Fake News Detection System",
|
| 206 |
+
"Developing an AI Chatbot for Customer Service Automation","Personalized Health Recommendations Using Wearable Data"]
|
| 207 |
+
|
| 208 |
+
# Specify rule IDs and error keywords to ignore
|
| 209 |
+
ignore_rule_ids = ['WHITESPACE_RULE']
|
| 210 |
+
ignore_error_keywords = ['repeated a whitespace']
|
| 211 |
+
|
| 212 |
+
# Blogs & Articles
|
| 213 |
+
blog_articles = ["https://www.dataquest.io/blog/how-data-science-resume-cv/",
|
| 214 |
+
"https://medium.com/data-science-at-microsoft/writing-a-resume-for-a-data-science-role-345b98bdf80b",
|
| 215 |
+
"https://medium.com/@alicechen.ai/resume-201-how-to-write-an-effective-data-science-resume-441cbe6c0932"
|
| 216 |
+
]
|
| 217 |
+
|
| 218 |
+
# Links
|
| 219 |
+
youtube_links = ["https://youtu.be/Tt08KmFfIYQ?si=EdebdWUfbttysrfL",
|
| 220 |
+
"https://youtu.be/R3abknwWX7k?si=m4EyviXgKDoPgIGr",
|
| 221 |
+
"https://youtu.be/1-z9ptlBar4?si=lA7WgU4j4MFGjBZV",
|
| 222 |
+
"https://youtu.be/pjqi_M3SPwY?si=5aRizcfpreKR9xUr",
|
| 223 |
+
"https://youtu.be/ROfceyeD7f4?si=OTbrL7BUKSW1u2mt"]
|
src/logging_config.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# src/logging_config.py
|
| 2 |
+
import logging
|
| 3 |
+
from logging.handlers import TimedRotatingFileHandler
|
| 4 |
+
import os
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
def setup_logging():
|
| 8 |
+
# Create the logs directory if it doesn't exist
|
| 9 |
+
log_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'logs')
|
| 10 |
+
if not os.path.exists(log_dir):
|
| 11 |
+
os.makedirs(log_dir)
|
| 12 |
+
|
| 13 |
+
# Set the logging level to DEBUG
|
| 14 |
+
logger = logging.getLogger()
|
| 15 |
+
logger.setLevel(logging.DEBUG)
|
| 16 |
+
|
| 17 |
+
# Create a console handler to output logs to the console
|
| 18 |
+
console_handler = logging.StreamHandler()
|
| 19 |
+
console_handler.setLevel(logging.DEBUG)
|
| 20 |
+
|
| 21 |
+
# Create a timed rotating file handler to output logs to a file
|
| 22 |
+
today_date = datetime.now().strftime('%Y-%m-%d')
|
| 23 |
+
log_file = os.path.join(log_dir, f'{today_date}.log')
|
| 24 |
+
file_handler = TimedRotatingFileHandler(log_file, when='midnight', interval=1)
|
| 25 |
+
file_handler.setLevel(logging.DEBUG)
|
| 26 |
+
file_handler.suffix = '%Y-%m-%d' # Date format for log file names
|
| 27 |
+
|
| 28 |
+
# Create a formatter and set it for both handlers
|
| 29 |
+
formatter = logging.Formatter(
|
| 30 |
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
| 31 |
+
console_handler.setFormatter(formatter)
|
| 32 |
+
file_handler.setFormatter(formatter)
|
| 33 |
+
|
| 34 |
+
# Add both handlers to the logger
|
| 35 |
+
logger.addHandler(console_handler)
|
| 36 |
+
logger.addHandler(file_handler)
|
| 37 |
+
|
| 38 |
+
# Avoid duplicate logs by removing the default handler if present
|
| 39 |
+
if logger.hasHandlers():
|
| 40 |
+
logger.handlers.clear()
|
| 41 |
+
logger.addHandler(console_handler)
|
| 42 |
+
logger.addHandler(file_handler)
|
src/main.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from flask import Flask, jsonify, request, flash, redirect, url_for, render_template
|
| 3 |
+
from submitter import ResumeSubmitter
|
| 4 |
+
from reviewer import ResumeReviewer
|
| 5 |
+
from resume_parser import ResumeParser
|
| 6 |
+
from logging_config import setup_logging
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
app = Flask(__name__, template_folder=os.path.join(os.path.dirname(__file__), '..', 'templates'))
|
| 10 |
+
app.secret_key = 'supersecretkey'
|
| 11 |
+
setup_logging()
|
| 12 |
+
|
| 13 |
+
@app.route('/v1/resumes/', methods=['POST', 'GET'])
|
| 14 |
+
def submit_resume():
|
| 15 |
+
if request.method == 'POST':
|
| 16 |
+
result = ResumeSubmitter().upload_file()
|
| 17 |
+
if os.path.exists(result):
|
| 18 |
+
resume_path = result # Get the path of the uploaded resume
|
| 19 |
+
try:
|
| 20 |
+
# Redirect to the /v1/reviews/ endpoint with the resume path as a parameter
|
| 21 |
+
return redirect(url_for('get_reviews', path=resume_path))
|
| 22 |
+
except Exception as e:
|
| 23 |
+
app.logger.error("Failed to redirect to /v1/reviews/: %s", str(e))
|
| 24 |
+
return jsonify(message="failed to redirect to reviews page"), 500
|
| 25 |
+
else:
|
| 26 |
+
return jsonify(message=f"failed to submit resume, {result}"), 400
|
| 27 |
+
else:
|
| 28 |
+
return ResumeSubmitter().upload_form()
|
| 29 |
+
|
| 30 |
+
@app.route("/v1/reviews/<path:path>", methods=['POST', 'GET'])
|
| 31 |
+
def get_reviews(path):
|
| 32 |
+
app.logger.debug("Inside get_reviews")
|
| 33 |
+
resume_parser = ResumeParser()
|
| 34 |
+
resume_reviewer = ResumeReviewer()
|
| 35 |
+
parsed_resume_response = resume_parser.parse_text(path)
|
| 36 |
+
|
| 37 |
+
# Check if the response data is in JSON format
|
| 38 |
+
try:
|
| 39 |
+
# Assuming parsed_resume_response.data contains the JSON string
|
| 40 |
+
parsed_resume_dict = json.loads(parsed_resume_response.data)
|
| 41 |
+
except json.JSONDecodeError:
|
| 42 |
+
app.logger.error("Failed to decode JSON from the response")
|
| 43 |
+
return "Invalid JSON response from parser", 500
|
| 44 |
+
|
| 45 |
+
# Save the dictionary as JSON file (optional, if you want to save it to a file)
|
| 46 |
+
with open('parsed_resume.json', 'w') as json_file:
|
| 47 |
+
json.dump(parsed_resume_dict, json_file)
|
| 48 |
+
|
| 49 |
+
# Pass the dictionary to the template
|
| 50 |
+
return render_template("review_output.html", parsed_resume=parsed_resume_dict)
|
| 51 |
+
|
| 52 |
+
@app.route("/v1/users/<int:id>", methods=['GET'])
|
| 53 |
+
def get_user(id):
|
| 54 |
+
return jsonify(message="user retrieved successfully! for given id {}".format(id))
|
| 55 |
+
|
| 56 |
+
@app.route('/', methods=['GET'])
|
| 57 |
+
def greet():
|
| 58 |
+
return render_template('home_page.html')
|
| 59 |
+
|
| 60 |
+
if __name__ == '__main__':
|
| 61 |
+
app.run()
|
src/resume_parser.py
ADDED
|
@@ -0,0 +1,1274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python file to parse different section from resume
|
| 2 |
+
from pdfminer.high_level import extract_pages, extract_text
|
| 3 |
+
from pdfminer.layout import LTTextContainer, LTChar, LTTextLineHorizontal
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
from flask import jsonify
|
| 6 |
+
import re, fitz, requests, logging, datetime
|
| 7 |
+
from .config import data_science_skills, keyword_variations, essential_skills, quality_mapping, Extract_sections, suggested_projects, ignore_rule_ids
|
| 8 |
+
from .config import required_sections, linkedin_domain, github_domain, basic_informations, section_headers, common_projects, ignore_error_keywords,blog_articles,youtube_links
|
| 9 |
+
from .config import kaggle_domain,hackerrank_domain,leetcode_domain,medium_domain
|
| 10 |
+
from spacy.matcher import Matcher
|
| 11 |
+
import language_tool_python
|
| 12 |
+
from collections import defaultdict
|
| 13 |
+
import random
|
| 14 |
+
tool = language_tool_python.LanguageTool('en-US')
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ResumeParser:
|
| 19 |
+
|
| 20 |
+
def extract_contact_number_from_resume(self, text):
|
| 21 |
+
contact_number = None
|
| 22 |
+
suggestion = ""
|
| 23 |
+
|
| 24 |
+
# Use regex pattern to find a potential contact number
|
| 25 |
+
pattern = r"\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b"
|
| 26 |
+
match = re.search(pattern, text)
|
| 27 |
+
if match:
|
| 28 |
+
contact_number = match.group()
|
| 29 |
+
# Check if the contact number is of the correct length
|
| 30 |
+
digits_only = re.sub(r'\D', '', contact_number)
|
| 31 |
+
if len(digits_only) == 10:
|
| 32 |
+
suggestion = ""
|
| 33 |
+
elif len(digits_only) > 10 and digits_only.startswith('91') and len(digits_only[2:]) == 10:
|
| 34 |
+
suggestion = ""
|
| 35 |
+
else:
|
| 36 |
+
suggestion = "Contact number should have exactly 10 digits."
|
| 37 |
+
|
| 38 |
+
return contact_number, suggestion
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def extract_hyperlinks(self, pdf_path):
|
| 43 |
+
doc = fitz.open(pdf_path)
|
| 44 |
+
links = []
|
| 45 |
+
|
| 46 |
+
for page_num in range(len(doc)):
|
| 47 |
+
page = doc.load_page(page_num)
|
| 48 |
+
link_list = page.get_links()
|
| 49 |
+
for link in link_list:
|
| 50 |
+
uri = link.get('uri', None)
|
| 51 |
+
if uri:
|
| 52 |
+
links.append(uri)
|
| 53 |
+
|
| 54 |
+
return links
|
| 55 |
+
|
| 56 |
+
def extract_text_from_pdf(self, pdf_path):
|
| 57 |
+
return extract_text(pdf_path)
|
| 58 |
+
|
| 59 |
+
def extract_email_from_text(self, text):
|
| 60 |
+
pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"
|
| 61 |
+
match = re.search(pattern, text)
|
| 62 |
+
if match:
|
| 63 |
+
return match.group()
|
| 64 |
+
return None
|
| 65 |
+
|
| 66 |
+
def extract_email_from_resume(self, pdf_path):
|
| 67 |
+
text = self.extract_text_from_pdf(pdf_path)
|
| 68 |
+
email = self.extract_email_from_text(text)
|
| 69 |
+
suggestion = ""
|
| 70 |
+
|
| 71 |
+
# If no email found in text, check hyperlinks
|
| 72 |
+
if not email:
|
| 73 |
+
links = self.extract_hyperlinks(pdf_path)
|
| 74 |
+
for link in links:
|
| 75 |
+
if link.startswith('mailto:'):
|
| 76 |
+
email_candidate = link.split('mailto:')[1]
|
| 77 |
+
if self.is_valid_email(email_candidate):
|
| 78 |
+
email = email_candidate
|
| 79 |
+
break
|
| 80 |
+
|
| 81 |
+
# Additional validation for email found in text or links
|
| 82 |
+
if email and not self.is_valid_email(email):
|
| 83 |
+
suggestion += "Your email address doesn't seem to be valid. Please check and correct."
|
| 84 |
+
|
| 85 |
+
return email, suggestion
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def is_valid_email(self, email):
|
| 89 |
+
# Length check
|
| 90 |
+
if len(email) > 254:
|
| 91 |
+
return False
|
| 92 |
+
|
| 93 |
+
# Consecutive special characters check
|
| 94 |
+
if re.search(r"[._%+-]{2,}", email):
|
| 95 |
+
return False
|
| 96 |
+
|
| 97 |
+
# Domain part validation
|
| 98 |
+
domain_part = email.split('@')[1]
|
| 99 |
+
if not re.match(r"[A-Za-z0-9.-]+\.[A-Za-z]{2,}", domain_part):
|
| 100 |
+
return False
|
| 101 |
+
|
| 102 |
+
# Standard email format check
|
| 103 |
+
pattern = r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$"
|
| 104 |
+
return re.match(pattern, email) is not None
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def extract_sections_from_resume(self, text):
|
| 108 |
+
missing_sections = []
|
| 109 |
+
sections_not_capitalized = []
|
| 110 |
+
|
| 111 |
+
for section in required_sections:
|
| 112 |
+
pattern = r"\b{}\b".format(re.escape(section))
|
| 113 |
+
|
| 114 |
+
match_obj = re.search(pattern, text, re.IGNORECASE)
|
| 115 |
+
if not match_obj:
|
| 116 |
+
missing_sections.append(section)
|
| 117 |
+
else:
|
| 118 |
+
if match_obj.group() not in map(str.upper, required_sections):
|
| 119 |
+
sections_not_capitalized.append(section)
|
| 120 |
+
|
| 121 |
+
return missing_sections, sections_not_capitalized
|
| 122 |
+
|
| 123 |
+
def extract_skills_from_resume(self, text):
|
| 124 |
+
if not isinstance(text, str):
|
| 125 |
+
raise ValueError(f"Expected 'text' to be a string, but got {type(text)}")
|
| 126 |
+
|
| 127 |
+
skills = []
|
| 128 |
+
for skill in essential_skills:
|
| 129 |
+
pattern = r"\b{}\b".format(re.escape(skill))
|
| 130 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 131 |
+
if match:
|
| 132 |
+
skills.append(skill)
|
| 133 |
+
return skills
|
| 134 |
+
|
| 135 |
+
def extract_keyword_variations_from_resume(self, text):
|
| 136 |
+
found_keywords = []
|
| 137 |
+
for keyword, variations in keyword_variations.items():
|
| 138 |
+
for variation in variations:
|
| 139 |
+
if variation.lower() in text.lower():
|
| 140 |
+
found_keywords.append(variation)
|
| 141 |
+
break
|
| 142 |
+
|
| 143 |
+
return found_keywords
|
| 144 |
+
|
| 145 |
+
def extract_keyword_variations_from_formatted_text(self, formatted_text):
|
| 146 |
+
found_keyword_section = []
|
| 147 |
+
for keyword, variations in keyword_variations.items():
|
| 148 |
+
for variation in variations:
|
| 149 |
+
if variation.lower() in formatted_text.lower():
|
| 150 |
+
found_keyword_section.append(variation)
|
| 151 |
+
break
|
| 152 |
+
|
| 153 |
+
return found_keyword_section
|
| 154 |
+
|
| 155 |
+
def extract_linkedIn_urls_from_pdf(self, pdf_path):
|
| 156 |
+
linkedin_urls = None
|
| 157 |
+
pdf_document = fitz.open(pdf_path)
|
| 158 |
+
for page_num in range(len(pdf_document)):
|
| 159 |
+
page = pdf_document.load_page(page_num)
|
| 160 |
+
links = page.get_links()
|
| 161 |
+
for link in links:
|
| 162 |
+
url = link.get('uri', '')
|
| 163 |
+
if re.search(linkedin_domain, url):
|
| 164 |
+
linkedin_urls = url
|
| 165 |
+
pdf_document.close()
|
| 166 |
+
return linkedin_urls
|
| 167 |
+
|
| 168 |
+
def extract_github_urls_from_pdf(self, pdf_path):
|
| 169 |
+
github_urls = None
|
| 170 |
+
pdf_document = fitz.open(pdf_path)
|
| 171 |
+
for page_num in range(len(pdf_document)):
|
| 172 |
+
page = pdf_document.load_page(page_num)
|
| 173 |
+
links = page.get_links()
|
| 174 |
+
for link in links:
|
| 175 |
+
url = link.get('uri', '')
|
| 176 |
+
if re.search(github_domain, url):
|
| 177 |
+
path = re.sub(github_domain, '', url)
|
| 178 |
+
parts = path.split('/')
|
| 179 |
+
if len(parts) == 1:
|
| 180 |
+
github_urls = url
|
| 181 |
+
pdf_document.close()
|
| 182 |
+
return github_urls
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def extract_extra_urls_pdf(self,pdf_path, domains):
|
| 186 |
+
extracted_urls = defaultdict(set)
|
| 187 |
+
try:
|
| 188 |
+
# Open the PDF document
|
| 189 |
+
pdf_document = fitz.open(pdf_path)
|
| 190 |
+
|
| 191 |
+
# Iterate through all pages in the PDF
|
| 192 |
+
for page_num in range(len(pdf_document)):
|
| 193 |
+
page = pdf_document.load_page(page_num)
|
| 194 |
+
links = page.get_links()
|
| 195 |
+
|
| 196 |
+
for link in links:
|
| 197 |
+
url = link.get('uri', '')
|
| 198 |
+
if url: # Ensure there's a URL
|
| 199 |
+
for domain in domains:
|
| 200 |
+
if re.search(domain, url, re.IGNORECASE):
|
| 201 |
+
extracted_urls[domain].add(url) # Add URL to the domain's set
|
| 202 |
+
except Exception as e:
|
| 203 |
+
print(f"Error processing PDF: {e}")
|
| 204 |
+
finally:
|
| 205 |
+
pdf_document.close()
|
| 206 |
+
|
| 207 |
+
return {domain: list(urls) for domain, urls in extracted_urls.items()}
|
| 208 |
+
|
| 209 |
+
def is_valid_url(self , github_urls ):
|
| 210 |
+
suggest = ""
|
| 211 |
+
for _ in [github_urls]:
|
| 212 |
+
if not github_urls:
|
| 213 |
+
break
|
| 214 |
+
|
| 215 |
+
try:
|
| 216 |
+
response = requests.head(github_urls)
|
| 217 |
+
if response.status_code != 200:
|
| 218 |
+
suggest = "GitHub URL is not valid, please check and correct. "
|
| 219 |
+
except requests.RequestException:
|
| 220 |
+
suggest = "GitHub URL is not valid, please check and correct. "
|
| 221 |
+
|
| 222 |
+
return suggest
|
| 223 |
+
return suggest
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def is_valid_name(self, name):
|
| 227 |
+
if any(char.isdigit() for char in name):
|
| 228 |
+
return False
|
| 229 |
+
if len(name.split()) > 3:
|
| 230 |
+
return False
|
| 231 |
+
common_non_names = {"Email", "Github", "LinkedIn", "Portfolio", "Data Analyst"}
|
| 232 |
+
if name in common_non_names:
|
| 233 |
+
return False
|
| 234 |
+
return True
|
| 235 |
+
|
| 236 |
+
def extract_name(self, resume_text):
|
| 237 |
+
|
| 238 |
+
lines = resume_text.split('\n')
|
| 239 |
+
|
| 240 |
+
# Use regex to find lines that likely contain names
|
| 241 |
+
name_lines = [line for line in lines if re.match(r'^[A-Za-z]*\s[A-Za-z]*', line.strip())]
|
| 242 |
+
|
| 243 |
+
names = []
|
| 244 |
+
for i in range(len(name_lines)):
|
| 245 |
+
if self.is_valid_name(name_lines[i].strip()):
|
| 246 |
+
names.append(name_lines[i].strip())
|
| 247 |
+
|
| 248 |
+
if len(names) >= 1:
|
| 249 |
+
name = names[0]
|
| 250 |
+
suggestion = ""
|
| 251 |
+
# Check if the name parts contain only alphabetic characters
|
| 252 |
+
name_parts = name.split()
|
| 253 |
+
if any(part[0].islower() for part in name_parts):
|
| 254 |
+
suggestion += " name should start with a capital letter. "
|
| 255 |
+
return name, suggestion
|
| 256 |
+
|
| 257 |
+
return None, "No valid name found"
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def check_missing_sections(self, resume_data):
|
| 261 |
+
missing_information = []
|
| 262 |
+
for section in basic_informations:
|
| 263 |
+
if not resume_data.get(section):
|
| 264 |
+
missing_information.append(section)
|
| 265 |
+
return missing_information
|
| 266 |
+
|
| 267 |
+
def segregate_sections(self, text):
|
| 268 |
+
header_pattern = re.compile(rf'^\s*({"|".join(re.escape(header) for header in section_headers)}):?\s*$', re.IGNORECASE)
|
| 269 |
+
sections_text = {}
|
| 270 |
+
current_section = None
|
| 271 |
+
lines = text.splitlines()
|
| 272 |
+
for line in lines:
|
| 273 |
+
clean_line = line.strip()
|
| 274 |
+
match = header_pattern.match(clean_line)
|
| 275 |
+
if match:
|
| 276 |
+
current_section = match.group(1).upper()
|
| 277 |
+
sections_text[current_section] = []
|
| 278 |
+
elif current_section:
|
| 279 |
+
sections_text[current_section].append(line.strip())
|
| 280 |
+
|
| 281 |
+
return sections_text
|
| 282 |
+
|
| 283 |
+
def extract_and_format_sections(self, sections_text, Extract_sections):
|
| 284 |
+
formatted_text = ""
|
| 285 |
+
for section in Extract_sections:
|
| 286 |
+
if section in sections_text:
|
| 287 |
+
section_content = " ".join(sections_text[section]).replace('\n', ' ')
|
| 288 |
+
formatted_text += f"{section}:\n{section_content}\n\n"
|
| 289 |
+
return formatted_text
|
| 290 |
+
|
| 291 |
+
def replace_keywords_with_placeholders(self, formatted_text, found_keyword_section):
|
| 292 |
+
placeholder_text = formatted_text
|
| 293 |
+
keyword_placeholders = {}
|
| 294 |
+
|
| 295 |
+
# Use a set to avoid duplicates and keep track of keyword placeholders
|
| 296 |
+
used_keywords = set()
|
| 297 |
+
for i, keyword in enumerate(found_keyword_section):
|
| 298 |
+
if keyword not in used_keywords:
|
| 299 |
+
used_keywords.add(keyword)
|
| 300 |
+
placeholder = f"{{KEYWORD_{i}}}"
|
| 301 |
+
keyword_placeholders[placeholder] = keyword
|
| 302 |
+
# Using word boundary to match whole words
|
| 303 |
+
placeholder_text = re.sub(r'\b' + re.escape(keyword) + r'\b', placeholder, placeholder_text, flags=re.IGNORECASE)
|
| 304 |
+
|
| 305 |
+
return placeholder_text, keyword_placeholders
|
| 306 |
+
|
| 307 |
+
def replace_placeholders_with_keywords(self, grammar_issues, keyword_placeholders):
|
| 308 |
+
updated_issues = []
|
| 309 |
+
for issue in grammar_issues:
|
| 310 |
+
context = issue['context']
|
| 311 |
+
for placeholder, keyword in keyword_placeholders.items():
|
| 312 |
+
context = context.replace(placeholder, keyword)
|
| 313 |
+
# Update the context in the issue dictionary
|
| 314 |
+
issue['context'] = context
|
| 315 |
+
updated_issues.append(issue)
|
| 316 |
+
return updated_issues
|
| 317 |
+
|
| 318 |
+
def grammar_check(self, placeholder_text):
|
| 319 |
+
matches = tool.check(placeholder_text)
|
| 320 |
+
grammar_issues = []
|
| 321 |
+
for match in matches:
|
| 322 |
+
issue = {
|
| 323 |
+
"context": match.context,
|
| 324 |
+
"error": match.message,
|
| 325 |
+
"rule_id": match.ruleId,
|
| 326 |
+
"suggested_correction": match.replacements
|
| 327 |
+
}
|
| 328 |
+
grammar_issues.append(issue)
|
| 329 |
+
return grammar_issues
|
| 330 |
+
|
| 331 |
+
def filter_grammar_issues(self, grammar_issues, ignore_rule_ids=None, ignore_error_keywords=None):
|
| 332 |
+
if ignore_rule_ids is None:
|
| 333 |
+
ignore_rule_ids = []
|
| 334 |
+
if ignore_error_keywords is None:
|
| 335 |
+
ignore_error_keywords = []
|
| 336 |
+
|
| 337 |
+
filtered_issues = []
|
| 338 |
+
for issue in grammar_issues:
|
| 339 |
+
if issue['rule_id'] not in ignore_rule_ids and not any(keyword in issue['error'] for keyword in ignore_error_keywords):
|
| 340 |
+
filtered_issues.append(issue)
|
| 341 |
+
|
| 342 |
+
return filtered_issues
|
| 343 |
+
|
| 344 |
+
def process_resume(self, text, found_keyword_section, Extract_sections):
|
| 345 |
+
sections_text = self.segregate_sections(text)
|
| 346 |
+
formatted_text = self.extract_and_format_sections(sections_text, Extract_sections)
|
| 347 |
+
found_keyword_section = self.extract_keyword_variations_from_formatted_text(formatted_text)
|
| 348 |
+
placeholder_text, keyword_placeholders = self.replace_keywords_with_placeholders(formatted_text, found_keyword_section)
|
| 349 |
+
grammar_issues = self.grammar_check(placeholder_text)
|
| 350 |
+
grammar_issues_text = self.replace_placeholders_with_keywords(grammar_issues, keyword_placeholders)
|
| 351 |
+
filtered_grammar_issues = self.filter_grammar_issues(grammar_issues, ignore_rule_ids, ignore_error_keywords)
|
| 352 |
+
return filtered_grammar_issues
|
| 353 |
+
|
| 354 |
+
def grammar_issue_check(self, text, found_keyword_section, Extract_sections):
|
| 355 |
+
issues = {}
|
| 356 |
+
text1 = " ".join(text.split("\n"))
|
| 357 |
+
for section in Extract_sections:
|
| 358 |
+
grammar_issues = self.process_resume(text, found_keyword_section, [section])
|
| 359 |
+
if not grammar_issues:
|
| 360 |
+
grammar_issues = "no error found"
|
| 361 |
+
issues[section] = grammar_issues
|
| 362 |
+
return issues
|
| 363 |
+
|
| 364 |
+
def normalize_font_name(self,font_name):
|
| 365 |
+
if '-' in font_name:
|
| 366 |
+
font_name = font_name.split('-')[0]
|
| 367 |
+
if '+' in font_name:
|
| 368 |
+
font_name = font_name.split('+')[1]
|
| 369 |
+
return font_name
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
def extract_text_properties(self, pdf_path, predefined_terms):
|
| 373 |
+
text_properties = []
|
| 374 |
+
current_phrase = ""
|
| 375 |
+
current_font_size = None
|
| 376 |
+
current_font_name = None
|
| 377 |
+
current_page_num = None
|
| 378 |
+
|
| 379 |
+
special_characters = set("●▪•!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")
|
| 380 |
+
|
| 381 |
+
def add_current_phrase():
|
| 382 |
+
nonlocal current_phrase
|
| 383 |
+
if current_phrase.strip():
|
| 384 |
+
flag = any(current_phrase in term for term in predefined_terms)
|
| 385 |
+
if not flag:
|
| 386 |
+
text_properties.append({
|
| 387 |
+
"text": current_phrase,
|
| 388 |
+
"font_size": current_font_size,
|
| 389 |
+
"font_name": current_font_name,
|
| 390 |
+
"page_num": current_page_num
|
| 391 |
+
})
|
| 392 |
+
current_phrase = ""
|
| 393 |
+
|
| 394 |
+
for page_layout in extract_pages(pdf_path):
|
| 395 |
+
for element in page_layout:
|
| 396 |
+
if isinstance(element, LTTextContainer):
|
| 397 |
+
for text_line in element:
|
| 398 |
+
if isinstance(text_line, LTTextLineHorizontal):
|
| 399 |
+
for character in text_line:
|
| 400 |
+
if isinstance(character, LTChar):
|
| 401 |
+
text = character.get_text()
|
| 402 |
+
font_size = round(character.size, 2)
|
| 403 |
+
font_name = self.normalize_font_name(character.fontname)
|
| 404 |
+
page_num = page_layout.pageid
|
| 405 |
+
|
| 406 |
+
if text.isspace() or text in special_characters:
|
| 407 |
+
add_current_phrase()
|
| 408 |
+
continue
|
| 409 |
+
|
| 410 |
+
if (font_size != current_font_size or font_name != current_font_name or
|
| 411 |
+
page_num != current_page_num):
|
| 412 |
+
add_current_phrase()
|
| 413 |
+
current_font_size = font_size
|
| 414 |
+
current_font_name = font_name
|
| 415 |
+
current_page_num = page_num
|
| 416 |
+
|
| 417 |
+
current_phrase += text
|
| 418 |
+
|
| 419 |
+
add_current_phrase()
|
| 420 |
+
|
| 421 |
+
return text_properties
|
| 422 |
+
|
| 423 |
+
def group_similar_fonts(self,text_properties, tolerance=0.5):
|
| 424 |
+
grouped_properties = defaultdict(list)
|
| 425 |
+
|
| 426 |
+
for prop in text_properties:
|
| 427 |
+
rounded_size = round(prop["font_size"] / tolerance) * tolerance
|
| 428 |
+
key = (prop["font_name"], rounded_size)
|
| 429 |
+
grouped_properties[key].append(prop)
|
| 430 |
+
|
| 431 |
+
return grouped_properties
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
def identify_different_fonts_and_sizes(self, grouped_properties):
|
| 437 |
+
most_common_group = max(grouped_properties.values(), key=len)
|
| 438 |
+
most_common_key = None
|
| 439 |
+
for key, group in grouped_properties.items():
|
| 440 |
+
if group == most_common_group:
|
| 441 |
+
most_common_key = key
|
| 442 |
+
break
|
| 443 |
+
|
| 444 |
+
different_texts = []
|
| 445 |
+
|
| 446 |
+
for key, group in grouped_properties.items():
|
| 447 |
+
if group != most_common_group:
|
| 448 |
+
for prop in group:
|
| 449 |
+
reason = []
|
| 450 |
+
if key[1] != most_common_key[1]:
|
| 451 |
+
reason.append(f"size not {most_common_key[1]}")
|
| 452 |
+
if key[0] != most_common_key[0]:
|
| 453 |
+
reason.append(f"font not {most_common_key[0]}")
|
| 454 |
+
different_texts.append({
|
| 455 |
+
"page_num": prop['page_num'],
|
| 456 |
+
"text": prop['text'],
|
| 457 |
+
"found_size": prop['font_size'],
|
| 458 |
+
"found_font_name": prop['font_name'],
|
| 459 |
+
"reason": ", ".join(reason)
|
| 460 |
+
})
|
| 461 |
+
|
| 462 |
+
return different_texts
|
| 463 |
+
|
| 464 |
+
def parse_dates(self, sections_text, section_name):
|
| 465 |
+
# Check if the section is in the text
|
| 466 |
+
suggest = ""
|
| 467 |
+
|
| 468 |
+
# Define the date patterns to match various date formats
|
| 469 |
+
date_pattern = (
|
| 470 |
+
r'\b\d{1,2}/\d{4}\b|' # MM/YYYY
|
| 471 |
+
r'\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+\d{4}\b|' # Month YYYY
|
| 472 |
+
r'\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+\d{1,2},?\s*\d{4}\b|' # Month DD, YYYY
|
| 473 |
+
r'\b\d{4}\b|' # YYYY
|
| 474 |
+
r'\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)[a-z]*/?\d{4}\b|' # Month/YYYY
|
| 475 |
+
r'\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)[a-z]*\d{4}\s*-\s*(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)[a-z]*\d{4}\b' # Month/YYYY - Month/YYYY
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
all_dates = []
|
| 479 |
+
|
| 480 |
+
# Iterate over the entries in the section_name
|
| 481 |
+
for entry in sections_text[section_name]:
|
| 482 |
+
entry = entry.lower()
|
| 483 |
+
matches = re.findall(date_pattern, entry)
|
| 484 |
+
if matches and len(matches)>1:
|
| 485 |
+
if len(matches) == 2:
|
| 486 |
+
all_dates.append(f"{matches[0]} {matches[1]}")
|
| 487 |
+
else:
|
| 488 |
+
all_dates.extend(matches)
|
| 489 |
+
|
| 490 |
+
return all_dates
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
def convert_to_date(self, date_str):
|
| 494 |
+
# Mapping of month names and abbreviations to their numeric equivalents
|
| 495 |
+
month_map = {
|
| 496 |
+
'jan': 1, 'january': 1, 'feb': 2, 'february': 2,
|
| 497 |
+
'mar': 3, 'march': 3, 'apr': 4, 'april': 4,
|
| 498 |
+
'may': 5, 'jun': 6, 'june': 6, 'jul': 7,
|
| 499 |
+
'july': 7, 'aug': 8, 'august': 8, 'sep': 9,
|
| 500 |
+
'september': 9, 'oct': 10, 'october': 10,
|
| 501 |
+
'nov': 11, 'november': 11, 'dec': 12, 'december': 12,
|
| 502 |
+
'01': 1, '02': 2, '03': 3, '04': 4,
|
| 503 |
+
'05': 5, '06': 6, '07': 7, '08': 8,
|
| 504 |
+
'09': 9, '10': 10, '11': 11, '12': 12
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
# Regex patterns to match different date formats
|
| 508 |
+
pattern_mm_yyyy = re.compile(r'(\d{1,2})/(\d{4})')
|
| 509 |
+
pattern_mm_yyyy_space = re.compile(r'(\d{1,2})\s(\d{4})')
|
| 510 |
+
pattern_month_yyyy = re.compile(r'([a-zA-Z]+)\s?(\d{4})')
|
| 511 |
+
pattern_yyyy = re.compile(r'(\d{4})')
|
| 512 |
+
|
| 513 |
+
def extract_date(date_str):
|
| 514 |
+
match_mm_yyyy = pattern_mm_yyyy.match(date_str)
|
| 515 |
+
match_mm_yyyy_space = pattern_mm_yyyy_space.match(date_str)
|
| 516 |
+
match_month_yyyy = pattern_month_yyyy.match(date_str)
|
| 517 |
+
match_yyyy = pattern_yyyy.match(date_str)
|
| 518 |
+
|
| 519 |
+
if match_mm_yyyy:
|
| 520 |
+
month = int(match_mm_yyyy.group(1))
|
| 521 |
+
year = int(match_mm_yyyy.group(2))
|
| 522 |
+
elif match_mm_yyyy_space:
|
| 523 |
+
month = int(match_mm_yyyy_space.group(1))
|
| 524 |
+
year = int(match_mm_yyyy_space.group(2))
|
| 525 |
+
elif match_month_yyyy:
|
| 526 |
+
month = month_map.get(match_month_yyyy.group(1).lower())
|
| 527 |
+
year = int(match_month_yyyy.group(2))
|
| 528 |
+
elif match_yyyy:
|
| 529 |
+
month = 1
|
| 530 |
+
year = int(match_yyyy.group(1))
|
| 531 |
+
else:
|
| 532 |
+
return []
|
| 533 |
+
|
| 534 |
+
return datetime.date(year, month, 1)
|
| 535 |
+
|
| 536 |
+
date_parts = re.findall(r'(\d{4}\s[a-zA-Z]+\s?|\d{4}[a-zA-Z]+|\d{4}\/\d{2}|\d{4}\s\d{2}|[a-zA-Z]+\s?\d{4}|\d{4}\s[a-zA-Z]+)', date_str)
|
| 537 |
+
if len(date_parts) == 1:
|
| 538 |
+
# Standalone year or single date
|
| 539 |
+
start_date = extract_date(date_parts[0])
|
| 540 |
+
end_date = datetime.date(start_date.year, start_date.month, start_date.day)
|
| 541 |
+
elif len(date_parts) == 2:
|
| 542 |
+
# Date range
|
| 543 |
+
start_date = extract_date(date_parts[0])
|
| 544 |
+
end_date = extract_date(date_parts[1])
|
| 545 |
+
else:
|
| 546 |
+
return []
|
| 547 |
+
|
| 548 |
+
return start_date, end_date
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
def date_time(self, date_parts):
|
| 552 |
+
converted_dates = []
|
| 553 |
+
for date_part in date_parts:
|
| 554 |
+
start_date, end_date = self.convert_to_date(date_part)
|
| 555 |
+
converted_dates.append((start_date, end_date))
|
| 556 |
+
return converted_dates
|
| 557 |
+
|
| 558 |
+
|
| 559 |
+
def check_chronological_order(self, converted_dates, section_name ):
|
| 560 |
+
suggestion = ""
|
| 561 |
+
sorted_dates = sorted(converted_dates, key=lambda x: (x[1], x[0]), reverse=True)
|
| 562 |
+
if converted_dates == sorted_dates:
|
| 563 |
+
suggestion = f"{section_name} section is in chronological order."
|
| 564 |
+
else:
|
| 565 |
+
suggestion = f"{section_name} section is not in chronological order."
|
| 566 |
+
|
| 567 |
+
return suggestion
|
| 568 |
+
|
| 569 |
+
def check_common_projects(self, projects_text):
|
| 570 |
+
found_projects = []
|
| 571 |
+
for project in common_projects:
|
| 572 |
+
if project.lower() in projects_text.lower():
|
| 573 |
+
found_projects.append(project)
|
| 574 |
+
return found_projects
|
| 575 |
+
|
| 576 |
+
def recommend_resources():
|
| 577 |
+
# Randomly pick 2 blog articles and 2 YouTube links
|
| 578 |
+
recommended_blogs = random.sample(blog_articles, 2)
|
| 579 |
+
recommended_youtube = random.sample(youtube_links, 2)
|
| 580 |
+
|
| 581 |
+
# Return the recommendations
|
| 582 |
+
return {
|
| 583 |
+
"Recommended Blogs": recommended_blogs,
|
| 584 |
+
"Recommended YouTube Links": recommended_youtube
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
def check_imarticus_certifications(self, certifications_text):
|
| 588 |
+
# Check if "imarticus" is present in the certifications text
|
| 589 |
+
if "imarticus" in certifications_text.lower():
|
| 590 |
+
return {
|
| 591 |
+
"found": True,
|
| 592 |
+
"message": "Imarticus certification found. Please upload it in the academic section."
|
| 593 |
+
}
|
| 594 |
+
return {
|
| 595 |
+
"found": False,
|
| 596 |
+
"message": "No Imarticus certification found in the provided text."
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
def chronological_order_check(self, sections_text, section_name):
|
| 601 |
+
order_suggestion = ""
|
| 602 |
+
suggestion = ""
|
| 603 |
+
section_name = section_name.upper()
|
| 604 |
+
if section_name in sections_text:
|
| 605 |
+
date = self.parse_dates(sections_text, section_name)
|
| 606 |
+
if date:
|
| 607 |
+
converted_dates = self.date_time(date)
|
| 608 |
+
order_suggestion = self.check_chronological_order(converted_dates, section_name)
|
| 609 |
+
else:
|
| 610 |
+
suggestion = f"No valid dates found in {section_name} section. "
|
| 611 |
+
else:
|
| 612 |
+
suggestion = f"{section_name} is not in section header. "
|
| 613 |
+
|
| 614 |
+
return order_suggestion, suggestion
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
|
| 618 |
+
# Function to check for spelling mistakes
|
| 619 |
+
def check_spelling(self, headers, section_headers):
|
| 620 |
+
suggestions = []
|
| 621 |
+
for header in headers:
|
| 622 |
+
if header.upper() not in map(str.upper, section_headers):
|
| 623 |
+
suggestions = header
|
| 624 |
+
return suggestions
|
| 625 |
+
|
| 626 |
+
def is_present_name(name):
|
| 627 |
+
"""
|
| 628 |
+
Checks if a given name has at least 2 words.
|
| 629 |
+
|
| 630 |
+
Args:
|
| 631 |
+
name: The name string to check.
|
| 632 |
+
|
| 633 |
+
Returns:
|
| 634 |
+
True if it has at least 2 words, false otherwise.
|
| 635 |
+
"""
|
| 636 |
+
parts = name.split()
|
| 637 |
+
return len(parts) >= 2
|
| 638 |
+
|
| 639 |
+
def is_sentence_case(name):
|
| 640 |
+
|
| 641 |
+
parts = name.split() # Split into individual words
|
| 642 |
+
for part in parts:
|
| 643 |
+
if not part: # handles empty strings in name
|
| 644 |
+
continue
|
| 645 |
+
if not part[0].isupper() or not part[1:].islower():
|
| 646 |
+
return False # Check if first letter is uppercase and rest are lowercase
|
| 647 |
+
return True
|
| 648 |
+
|
| 649 |
+
def is_present_name(self,name):
|
| 650 |
+
parts = name.split()
|
| 651 |
+
return len(parts) >= 2
|
| 652 |
+
|
| 653 |
+
def is_sentence_case(self,name):
|
| 654 |
+
parts = name.split()
|
| 655 |
+
for part in parts:
|
| 656 |
+
if not part:
|
| 657 |
+
continue
|
| 658 |
+
if not part[0].isupper() or not part[1:].islower():
|
| 659 |
+
return False
|
| 660 |
+
return True
|
| 661 |
+
|
| 662 |
+
def extract_project_links(self,sections_text):
|
| 663 |
+
project_links = {}
|
| 664 |
+
|
| 665 |
+
if "PROJECTS" in sections_text:
|
| 666 |
+
project_list = sections_text.get("PROJECTS", [])
|
| 667 |
+
url_pattern = r"https?://[^\s]+"
|
| 668 |
+
for project in project_list:
|
| 669 |
+
links = re.findall(url_pattern,project)
|
| 670 |
+
if links:
|
| 671 |
+
project_links[project] = links
|
| 672 |
+
return project_links
|
| 673 |
+
|
| 674 |
+
def count_sentences(self,text):
|
| 675 |
+
sentence_endings = r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|!)\s"
|
| 676 |
+
sentences = re.split(sentence_endings, text)
|
| 677 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 678 |
+
return len(sentences)
|
| 679 |
+
|
| 680 |
+
def calculate_summary_score(self,summary):
|
| 681 |
+
if not summary:
|
| 682 |
+
score+=0
|
| 683 |
+
|
| 684 |
+
num_sentences = self.count_sentences(summary)
|
| 685 |
+
if num_sentences<=4:
|
| 686 |
+
return 3
|
| 687 |
+
elif num_sentences>4:
|
| 688 |
+
return 1
|
| 689 |
+
else:
|
| 690 |
+
return 0
|
| 691 |
+
|
| 692 |
+
def calculate_extra_urls_bonus(self,pdf_path):
|
| 693 |
+
domains = [
|
| 694 |
+
r"hackerrank\.com", # Hackerrank
|
| 695 |
+
r"leetcode\.com", # LeetCode
|
| 696 |
+
r"medium\.com" # Medium
|
| 697 |
+
]
|
| 698 |
+
extra_urls = self.extract_extra_urls_pdf(pdf_path, domains)
|
| 699 |
+
has_extra_urls = any(urls for urls in extra_urls.values())
|
| 700 |
+
return 5 if has_extra_urls else 0
|
| 701 |
+
|
| 702 |
+
def calculate_relevant_experience_score(self, experience_text):
|
| 703 |
+
"""
|
| 704 |
+
Assigns a score based on the presence of relevant experience keywords.
|
| 705 |
+
|
| 706 |
+
Args:
|
| 707 |
+
experience_text (str): The extracted work experience section text.
|
| 708 |
+
|
| 709 |
+
Returns:
|
| 710 |
+
int: A score of 5 if relevant keywords are found, otherwise 0.
|
| 711 |
+
"""
|
| 712 |
+
if not experience_text:
|
| 713 |
+
return 0 # ✅ No experience section → Score 0
|
| 714 |
+
|
| 715 |
+
if isinstance(experience_text, list):
|
| 716 |
+
experience_text = " ".join(experience_text) # ✅ Convert list to a single string
|
| 717 |
+
|
| 718 |
+
experience_text = experience_text.strip().lower() # ✅ Ensure it's a string and lowercase
|
| 719 |
+
|
| 720 |
+
# ✅ Check if any keyword from 'data_science_skills' or 'essential_skills' exists
|
| 721 |
+
for skill in data_science_skills + essential_skills:
|
| 722 |
+
if skill.lower() in experience_text:
|
| 723 |
+
return 5 # ✅ Found relevant experience → Full score
|
| 724 |
+
|
| 725 |
+
return 0
|
| 726 |
+
|
| 727 |
+
def calculate_ds_skills_score(self, skills_present):
|
| 728 |
+
if not skills_present: # No skills found at all
|
| 729 |
+
return 0
|
| 730 |
+
|
| 731 |
+
# Use skills from config instead of hardcoded list
|
| 732 |
+
ds_skills_list_lower = [skill.lower() for skill in data_science_skills]
|
| 733 |
+
skills_present_lower = [skill.lower() for skill in skills_present]
|
| 734 |
+
|
| 735 |
+
matching_count = sum(1 for skill in skills_present_lower
|
| 736 |
+
if skill in ds_skills_list_lower)
|
| 737 |
+
|
| 738 |
+
if matching_count == 0: # Skills found but none match DS list
|
| 739 |
+
return 2
|
| 740 |
+
elif 1 <= matching_count <= 5:
|
| 741 |
+
return 3
|
| 742 |
+
elif matching_count > 5:
|
| 743 |
+
return 5
|
| 744 |
+
return 0
|
| 745 |
+
|
| 746 |
+
def calculate_project_link_score(self, projects_with_links):
|
| 747 |
+
"""
|
| 748 |
+
Assigns a score based on whether project links are present.
|
| 749 |
+
|
| 750 |
+
Args:
|
| 751 |
+
projects_with_links (int): The number of projects with links.
|
| 752 |
+
|
| 753 |
+
Returns:
|
| 754 |
+
int: 2 if project links are found, otherwise 0.
|
| 755 |
+
"""
|
| 756 |
+
return 2 if projects_with_links > 0 else 0
|
| 757 |
+
|
| 758 |
+
|
| 759 |
+
def imarticus_review_score(self,name,contact_number,email,linkedin_urls,github_url,missing_sections,sections_not_capitalized,common_projects,section_order_suggestion,sections_text,skills,relevant_experience_score):
|
| 760 |
+
score = 0
|
| 761 |
+
if name:
|
| 762 |
+
name_parts = name.split()
|
| 763 |
+
num_parts = len(name_parts)
|
| 764 |
+
|
| 765 |
+
if num_parts == 0:
|
| 766 |
+
score += 0
|
| 767 |
+
if self.is_sentence_case(name):
|
| 768 |
+
score += 3
|
| 769 |
+
elif self.is_present_name(name):
|
| 770 |
+
score += 1.5
|
| 771 |
+
|
| 772 |
+
if contact_number and isinstance(contact_number, str):
|
| 773 |
+
digits_only = re.sub(r'\D', '', contact_number)
|
| 774 |
+
|
| 775 |
+
if digits_only.startswith("91") and len(digits_only) > 10:
|
| 776 |
+
digits_only = digits_only[2:] # Remove the first two characters ('91')
|
| 777 |
+
|
| 778 |
+
if len(digits_only) == 10 and digits_only[0] in "6789": # Check for valid Indian mobile numbers
|
| 779 |
+
score += 3
|
| 780 |
+
|
| 781 |
+
if email:
|
| 782 |
+
score += 3 if self.is_valid_email(email) else 0
|
| 783 |
+
|
| 784 |
+
score += 3 if linkedin_urls else 0
|
| 785 |
+
|
| 786 |
+
if github_url:
|
| 787 |
+
github_suggestion = self.is_valid_url(github_url)
|
| 788 |
+
score += 3 if not github_suggestion else 0
|
| 789 |
+
else:
|
| 790 |
+
score += 0
|
| 791 |
+
|
| 792 |
+
if len(missing_sections)==0 and len(sections_not_capitalized)==0:
|
| 793 |
+
score+=10
|
| 794 |
+
elif len(missing_sections)==0 and len(sections_not_capitalized)>0:
|
| 795 |
+
score+=8
|
| 796 |
+
elif len(missing_sections)<=3:
|
| 797 |
+
score+=6
|
| 798 |
+
elif len(missing_sections)>4:
|
| 799 |
+
score+=3
|
| 800 |
+
|
| 801 |
+
if common_projects:
|
| 802 |
+
score +=0
|
| 803 |
+
else:
|
| 804 |
+
score +=5
|
| 805 |
+
|
| 806 |
+
if section_order_suggestion:
|
| 807 |
+
score -= 2
|
| 808 |
+
else:
|
| 809 |
+
score
|
| 810 |
+
|
| 811 |
+
"""
|
| 812 |
+
ds_skills_list_lower = [skill.lower() for skill in data_science_skills]
|
| 813 |
+
skills_present_lower = [skill.lower() for skill in self.extract_skills_from_resume(skills) ]
|
| 814 |
+
|
| 815 |
+
matching_skill_count = 0
|
| 816 |
+
for skill in skills_present_lower:
|
| 817 |
+
if ds_skills_list_lower:
|
| 818 |
+
matching_skill_count+=1
|
| 819 |
+
if matching_skill_count==0:
|
| 820 |
+
score+=0
|
| 821 |
+
|
| 822 |
+
if matching_skill_count<=5:
|
| 823 |
+
score+=2
|
| 824 |
+
elif matching_skill_count>=10 and matching_skill_count<=15:
|
| 825 |
+
score+5
|
| 826 |
+
else:
|
| 827 |
+
score+=8
|
| 828 |
+
"""
|
| 829 |
+
|
| 830 |
+
if "PROJECTS" not in sections_text:
|
| 831 |
+
score+=0
|
| 832 |
+
else:
|
| 833 |
+
project_list = sections_text.get("PROJECTS",[])
|
| 834 |
+
project_count = len([x for x in project_list if "Description" in x])
|
| 835 |
+
|
| 836 |
+
if project_count<=2:
|
| 837 |
+
score+=2
|
| 838 |
+
elif project_count>2 and project_count<=4:
|
| 839 |
+
score+=5
|
| 840 |
+
elif project_count>4:
|
| 841 |
+
score+=3
|
| 842 |
+
"""
|
| 843 |
+
project_links = self.extract_project_links(sections_text)
|
| 844 |
+
total_projects = len(sections_text.get("PROJECTS", []))
|
| 845 |
+
projects_with_links = len(project_links)
|
| 846 |
+
|
| 847 |
+
if total_projects > 0:
|
| 848 |
+
if projects_with_links == 0:
|
| 849 |
+
score+=0
|
| 850 |
+
elif projects_with_links / total_projects >= 0.5:
|
| 851 |
+
score += 1.5
|
| 852 |
+
if projects_with_links == total_projects:
|
| 853 |
+
score += 3
|
| 854 |
+
"""
|
| 855 |
+
resume_data = {}
|
| 856 |
+
# Extract projects & links
|
| 857 |
+
project_links = self.extract_project_links(sections_text)
|
| 858 |
+
projects_with_links = len(project_links)
|
| 859 |
+
|
| 860 |
+
# ✅ Count only projects with descriptions
|
| 861 |
+
valid_projects = [
|
| 862 |
+
p for p in sections_text.get("PROJECTS", []) if "description" in p.lower()
|
| 863 |
+
]
|
| 864 |
+
total_projects = len(valid_projects) # ✅ Count projects properly
|
| 865 |
+
|
| 866 |
+
# ✅ Calculate project link score
|
| 867 |
+
project_link_score = self.calculate_project_link_score(projects_with_links)
|
| 868 |
+
resume_data["project_link_score"] = project_link_score
|
| 869 |
+
|
| 870 |
+
# ✅ Prevent division by zero
|
| 871 |
+
if total_projects > 0:
|
| 872 |
+
if projects_with_links == 0:
|
| 873 |
+
score += 0
|
| 874 |
+
elif projects_with_links / total_projects >= 0.5:
|
| 875 |
+
score += 1.5
|
| 876 |
+
if projects_with_links == total_projects:
|
| 877 |
+
score += 3
|
| 878 |
+
else:
|
| 879 |
+
score += 0 # ✅ Ensure no division error if no projects exist
|
| 880 |
+
|
| 881 |
+
"""
|
| 882 |
+
profile_summary = sections_text.get("PROFILE SUMMARY", "")
|
| 883 |
+
print(profile_summary)
|
| 884 |
+
|
| 885 |
+
summary_score = self.calculate_summary_score(profile_summary)
|
| 886 |
+
score += summary_score
|
| 887 |
+
"""
|
| 888 |
+
ds_skills_score = self.calculate_ds_skills_score(skills)
|
| 889 |
+
score += ds_skills_score
|
| 890 |
+
|
| 891 |
+
|
| 892 |
+
certifications = sections_text.get("CERTIFICATIONS & ACADEMIC ENDEAVOURS", [])
|
| 893 |
+
num_certifications = len(certifications)
|
| 894 |
+
|
| 895 |
+
if num_certifications==0:
|
| 896 |
+
score+=0
|
| 897 |
+
elif 0 < num_certifications <= 2:
|
| 898 |
+
score+=3
|
| 899 |
+
elif 2 < num_certifications <= 4:
|
| 900 |
+
score+=5
|
| 901 |
+
elif num_certifications>4:
|
| 902 |
+
score+=7
|
| 903 |
+
|
| 904 |
+
"""
|
| 905 |
+
extra_urls_bonus = self.calculate_extra_urls_bonus(pdf_path)
|
| 906 |
+
score += extra_urls_bonus
|
| 907 |
+
"""
|
| 908 |
+
|
| 909 |
+
score += relevant_experience_score
|
| 910 |
+
|
| 911 |
+
score += project_link_score
|
| 912 |
+
|
| 913 |
+
return score
|
| 914 |
+
|
| 915 |
+
|
| 916 |
+
|
| 917 |
+
|
| 918 |
+
def imarticus_detailed_score(self, name, contact_number, email, linkedin_urls, github_url,
|
| 919 |
+
missing_sections=None, sections_not_capitalized=None, common_projects=None,
|
| 920 |
+
section_order_suggestion=None, sections_text=None, skills=None,
|
| 921 |
+
relevant_experience_score=0):
|
| 922 |
+
|
| 923 |
+
# Ensure lists and dictionaries have default values to avoid 'NoneType' errors
|
| 924 |
+
missing_sections = missing_sections or []
|
| 925 |
+
sections_not_capitalized = sections_not_capitalized or []
|
| 926 |
+
common_projects = common_projects or []
|
| 927 |
+
sections_text = sections_text or {}
|
| 928 |
+
|
| 929 |
+
score_breakdown = {
|
| 930 |
+
"name_score": 0,
|
| 931 |
+
"contact_number_score": 0,
|
| 932 |
+
"email_score": 0,
|
| 933 |
+
"linkedin_url_score": 0,
|
| 934 |
+
"github_url_score": 0,
|
| 935 |
+
"missing_sections_score": 0,
|
| 936 |
+
"common_projects_score": 0,
|
| 937 |
+
"section_order_score": 0,
|
| 938 |
+
"projects_score": 0,
|
| 939 |
+
"certifications_score": 0,
|
| 940 |
+
"relevant_experience_score": 0,
|
| 941 |
+
"ds_skills_score": 0,
|
| 942 |
+
"extra_urls_bonus": 0,
|
| 943 |
+
"summary_score": 0,
|
| 944 |
+
"project_link_score": 0
|
| 945 |
+
}
|
| 946 |
+
|
| 947 |
+
# ✅ Name Score (3 Points)
|
| 948 |
+
if name:
|
| 949 |
+
if self.is_sentence_case(name):
|
| 950 |
+
score_breakdown["name_score"] = 3
|
| 951 |
+
elif self.is_present_name(name):
|
| 952 |
+
score_breakdown["name_score"] = 1.5
|
| 953 |
+
|
| 954 |
+
|
| 955 |
+
# ✅ Contact Number Score (3 Points)
|
| 956 |
+
if contact_number and isinstance(contact_number, str):
|
| 957 |
+
digits_only = re.sub(r'\D', '', contact_number)
|
| 958 |
+
if digits_only.startswith("91") and len(digits_only) > 10:
|
| 959 |
+
digits_only = digits_only[2:]
|
| 960 |
+
if len(digits_only) == 10 and digits_only[0] in "6789":
|
| 961 |
+
score_breakdown["contact_number_score"] = 3
|
| 962 |
+
|
| 963 |
+
# ✅ Email Score (3 Points)
|
| 964 |
+
score_breakdown["email_score"] = 3 if email and self.is_valid_email(email) else 0
|
| 965 |
+
|
| 966 |
+
# ✅ LinkedIn URL Score (3 Points)
|
| 967 |
+
score_breakdown["linkedin_url_score"] = 3 if linkedin_urls else 0
|
| 968 |
+
|
| 969 |
+
# ✅ GitHub URL Score (3 Points)
|
| 970 |
+
if github_url and self.is_valid_url(github_url):
|
| 971 |
+
score_breakdown["github_url_score"] = 3
|
| 972 |
+
|
| 973 |
+
# ✅ Missing Sections Score (10 Points)
|
| 974 |
+
if not missing_sections and not sections_not_capitalized:
|
| 975 |
+
score_breakdown["missing_sections_score"] = 10
|
| 976 |
+
elif not missing_sections and sections_not_capitalized:
|
| 977 |
+
score_breakdown["missing_sections_score"] = 8
|
| 978 |
+
elif len(missing_sections) <= 3:
|
| 979 |
+
score_breakdown["missing_sections_score"] = 6
|
| 980 |
+
else:
|
| 981 |
+
score_breakdown["missing_sections_score"] = 3
|
| 982 |
+
|
| 983 |
+
# ✅ Common Projects Score (5 Points)
|
| 984 |
+
score_breakdown["common_projects_score"] = 0 if common_projects else 5
|
| 985 |
+
|
| 986 |
+
# ✅ Section Order Score (2 Points)
|
| 987 |
+
score_breakdown["section_order_score"] = -2 if section_order_suggestion else 0
|
| 988 |
+
|
| 989 |
+
# ✅ Projects Score (5 Points)
|
| 990 |
+
if "PROJECTS" in sections_text:
|
| 991 |
+
project_list = sections_text.get("PROJECTS", [])
|
| 992 |
+
project_count = len([x for x in project_list if "Description" in x])
|
| 993 |
+
if project_count <= 2:
|
| 994 |
+
score_breakdown["projects_score"] = 2
|
| 995 |
+
elif 2 < project_count <= 4:
|
| 996 |
+
score_breakdown["projects_score"] = 5
|
| 997 |
+
else:
|
| 998 |
+
score_breakdown["projects_score"] = 3
|
| 999 |
+
|
| 1000 |
+
# ✅ Certifications Score (7 Points)
|
| 1001 |
+
certifications = sections_text.get("CERTIFICATIONS & ACADEMIC ENDEAVOURS", [])
|
| 1002 |
+
num_certifications = len(certifications)
|
| 1003 |
+
if num_certifications == 0:
|
| 1004 |
+
score_breakdown["certifications_score"] = 0
|
| 1005 |
+
elif 0 < num_certifications <= 2:
|
| 1006 |
+
score_breakdown["certifications_score"] = 3
|
| 1007 |
+
elif 2 < num_certifications <= 4:
|
| 1008 |
+
score_breakdown["certifications_score"] = 5
|
| 1009 |
+
else:
|
| 1010 |
+
score_breakdown["certifications_score"] = 7
|
| 1011 |
+
|
| 1012 |
+
# ✅ Relevant Experience Score (5 Points)
|
| 1013 |
+
score_breakdown["relevant_experience_score"] = relevant_experience_score if relevant_experience_score is not None else 0
|
| 1014 |
+
|
| 1015 |
+
# ✅ Data Science Skills Score (5 Points)
|
| 1016 |
+
score_breakdown["ds_skills_score"] = self.calculate_ds_skills_score(skills)
|
| 1017 |
+
|
| 1018 |
+
# ✅ Extra URLs Bonus (5 Points)
|
| 1019 |
+
score_breakdown["extra_urls_bonus"] = self.calculate_extra_urls_bonus(sections_text)
|
| 1020 |
+
|
| 1021 |
+
# ✅ Summary Score (5 Points)
|
| 1022 |
+
profile_summary = sections_text.get("PROFILE SUMMARY", "")
|
| 1023 |
+
score_breakdown["summary_score"] = self.calculate_summary_score(profile_summary)
|
| 1024 |
+
|
| 1025 |
+
# ✅ Project Link Score (2 Points)
|
| 1026 |
+
project_links = self.extract_project_links(sections_text)
|
| 1027 |
+
projects_with_links = len(project_links)
|
| 1028 |
+
score_breakdown["project_link_score"] = self.calculate_project_link_score(projects_with_links)
|
| 1029 |
+
|
| 1030 |
+
return score_breakdown
|
| 1031 |
+
|
| 1032 |
+
def calculate_name_score(self,name):
|
| 1033 |
+
if not name:
|
| 1034 |
+
return 0
|
| 1035 |
+
|
| 1036 |
+
name_parts = name.split()
|
| 1037 |
+
num_parts = len(name_parts)
|
| 1038 |
+
|
| 1039 |
+
if num_parts == 0:
|
| 1040 |
+
return 0
|
| 1041 |
+
elif self.is_sentence_case(name):
|
| 1042 |
+
return 3
|
| 1043 |
+
elif self.is_present_name(name):
|
| 1044 |
+
return 1.5
|
| 1045 |
+
else:
|
| 1046 |
+
return 0
|
| 1047 |
+
|
| 1048 |
+
|
| 1049 |
+
def calculate_contact(self,contact_number):
|
| 1050 |
+
if contact_number and isinstance(contact_number, str):
|
| 1051 |
+
digits_only = re.sub(r'\D', '', contact_number)
|
| 1052 |
+
|
| 1053 |
+
if digits_only.startswith("91") and len(digits_only) > 10:
|
| 1054 |
+
digits_only = digits_only[2:] # Remove the first two characters ('91')
|
| 1055 |
+
|
| 1056 |
+
if len(digits_only) == 10 and digits_only[0] in "6789": # Check for valid Indian mobile numbers
|
| 1057 |
+
return 3
|
| 1058 |
+
else:
|
| 1059 |
+
return 0
|
| 1060 |
+
|
| 1061 |
+
def calculate_email(self,email):
|
| 1062 |
+
if email:
|
| 1063 |
+
if self.is_valid_email(email):
|
| 1064 |
+
return 3
|
| 1065 |
+
else:
|
| 1066 |
+
return 0
|
| 1067 |
+
|
| 1068 |
+
def calculate_github_url_score(self,github_url):
|
| 1069 |
+
if github_url:
|
| 1070 |
+
github_suggestion = self.is_valid_url(github_url)
|
| 1071 |
+
return 3 if not github_suggestion else 0
|
| 1072 |
+
return 0
|
| 1073 |
+
|
| 1074 |
+
def parse_text(self, path):
|
| 1075 |
+
logger = logging.getLogger(__name__)
|
| 1076 |
+
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
| 1077 |
+
resume_data = {}
|
| 1078 |
+
logger.debug('parsing text')
|
| 1079 |
+
text = self.extract_text_from_pdf(path)
|
| 1080 |
+
text1 = " ".join(text.split("\n"))
|
| 1081 |
+
skills_found = self.extract_skills_from_resume(text)
|
| 1082 |
+
found_keywords = self.extract_keyword_variations_from_resume(text)
|
| 1083 |
+
sections_text = self.segregate_sections(text)
|
| 1084 |
+
formatted_text = self.extract_and_format_sections(sections_text, Extract_sections)
|
| 1085 |
+
found_keyword_section = self.extract_keyword_variations_from_formatted_text(formatted_text)
|
| 1086 |
+
|
| 1087 |
+
parsed_sections = self.segregate_sections(text)
|
| 1088 |
+
projects = parsed_sections.get("PROJECTS", [])
|
| 1089 |
+
certifications = parsed_sections.get("CERTIFICATIONS & ACADEMIC ENDEAVOURS", [])
|
| 1090 |
+
projects_text = "\n".join(projects)
|
| 1091 |
+
certifications_text = "\n".join(certifications)
|
| 1092 |
+
found_imarticus_certification = self.check_imarticus_certifications(certifications_text)
|
| 1093 |
+
found_projects = self.check_common_projects(projects_text)
|
| 1094 |
+
|
| 1095 |
+
name, name_suggestion = self.extract_name(text)
|
| 1096 |
+
contact_number, contact_suggestion = self.extract_contact_number_from_resume(text)
|
| 1097 |
+
email, email_suggestion = self.extract_email_from_resume(path)
|
| 1098 |
+
github_urls = self.extract_github_urls_from_pdf(path)
|
| 1099 |
+
github_urls_suggestions = self.is_valid_url(github_urls)
|
| 1100 |
+
linkedin_urls = self.extract_linkedIn_urls_from_pdf(path)
|
| 1101 |
+
section_by_grammer_issues = self.grammar_issue_check(text, found_keyword_section, Extract_sections)
|
| 1102 |
+
|
| 1103 |
+
|
| 1104 |
+
domains = [
|
| 1105 |
+
r"hackerrank\.com", # Hackerrank
|
| 1106 |
+
r"leetcode\.com", # LeetCode
|
| 1107 |
+
r"medium\.com" # Medium
|
| 1108 |
+
]
|
| 1109 |
+
extra_urls = self.extract_extra_urls_pdf(path, domains)
|
| 1110 |
+
|
| 1111 |
+
education_order_suggestion, education_suggestion = self.chronological_order_check(sections_text, "ACADEMIC PROFILE")
|
| 1112 |
+
experience_order_suggestion, experience_suggestion = self.chronological_order_check(sections_text, "WORK EXPERIENCE")
|
| 1113 |
+
|
| 1114 |
+
headers = list(sections_text.keys())
|
| 1115 |
+
spelling_suggestions = self.check_spelling(headers, section_headers)
|
| 1116 |
+
|
| 1117 |
+
predefined_terms = [name, email]
|
| 1118 |
+
predefined_terms.extend(required_sections)
|
| 1119 |
+
text_properties = self.extract_text_properties(path, predefined_terms)
|
| 1120 |
+
grouped_properties = self.group_similar_fonts(text_properties)
|
| 1121 |
+
different_texts = self.identify_different_fonts_and_sizes(grouped_properties)
|
| 1122 |
+
|
| 1123 |
+
font_suggestions = []
|
| 1124 |
+
for item in different_texts:
|
| 1125 |
+
font_suggestion = f"Formatting issue at Page: {item['page_num']}, Text: {item['text']}, Reason: {item['reason']}, Found font size: {item['found_size']}, Found font name: {item['found_font_name']}"
|
| 1126 |
+
font_suggestions.append(font_suggestion)
|
| 1127 |
+
|
| 1128 |
+
missing_sections, sections_not_capitalized = self.extract_sections_from_resume(text)
|
| 1129 |
+
|
| 1130 |
+
linkedin_urls_suggestion = str()
|
| 1131 |
+
common_project = str()
|
| 1132 |
+
if not name:
|
| 1133 |
+
name_suggestion = "Please add name to the resume."
|
| 1134 |
+
if not contact_number:
|
| 1135 |
+
contact_suggestion = "Please add the contact number to the resume."
|
| 1136 |
+
if not email:
|
| 1137 |
+
email_suggestion = "Please add the email address to the resume."
|
| 1138 |
+
if not github_urls:
|
| 1139 |
+
github_urls_suggestions = "Add the github_urls to the resume."
|
| 1140 |
+
if not linkedin_urls:
|
| 1141 |
+
linkedin_urls_suggestion = "Add the linkedin_urls to the resume."
|
| 1142 |
+
if found_projects:
|
| 1143 |
+
common_project = "Common projects found in Projects section: "
|
| 1144 |
+
for project in found_projects:
|
| 1145 |
+
common_project += project
|
| 1146 |
+
|
| 1147 |
+
# Replace the existing project length suggestion code with:
|
| 1148 |
+
project_list = sections_text.get("PROJECTS", [])
|
| 1149 |
+
projects_with_description = [
|
| 1150 |
+
p for p in project_list
|
| 1151 |
+
if "description" in p.lower()
|
| 1152 |
+
]
|
| 1153 |
+
project_count = len(projects_with_description)
|
| 1154 |
+
|
| 1155 |
+
if project_count == 0:
|
| 1156 |
+
project_length_suggestion = "No projects found. Consider at least 2 projects."
|
| 1157 |
+
elif project_count == 1:
|
| 1158 |
+
project_length_suggestion = "Only 1 project found. Consider adding 1 more project."
|
| 1159 |
+
else:
|
| 1160 |
+
project_length_suggestion = f"{project_count} projects found."
|
| 1161 |
+
|
| 1162 |
+
# Store in resume data (keeps your existing URL extraction)
|
| 1163 |
+
resume_data["project_length_suggestion"] = project_length_suggestion
|
| 1164 |
+
|
| 1165 |
+
experience_text = sections_text.get("WORK EXPERIENCE", "") # ✅ Extract work experience section
|
| 1166 |
+
relevant_experience_score = self.calculate_relevant_experience_score(experience_text) # ✅ Calculate score
|
| 1167 |
+
|
| 1168 |
+
# ✅ Store in the final resume data output
|
| 1169 |
+
resume_data["relevant_experience_score"] = relevant_experience_score
|
| 1170 |
+
|
| 1171 |
+
section_grammar_check_issues = self.grammar_check(sections_text.keys())
|
| 1172 |
+
|
| 1173 |
+
recommended_blogs = random.sample(blog_articles, 2)
|
| 1174 |
+
recommended_youtube = random.sample(youtube_links, 2)
|
| 1175 |
+
|
| 1176 |
+
name_score = self.calculate_name_score(name)
|
| 1177 |
+
|
| 1178 |
+
contact_score = self.calculate_contact(contact_number)
|
| 1179 |
+
|
| 1180 |
+
email_score = self.calculate_email(email)
|
| 1181 |
+
|
| 1182 |
+
github_url_score = self.calculate_github_url_score(github_urls)
|
| 1183 |
+
|
| 1184 |
+
# Calculate imarticus_score
|
| 1185 |
+
imarticus_score = self.imarticus_review_score(
|
| 1186 |
+
name,
|
| 1187 |
+
contact_number,
|
| 1188 |
+
email,
|
| 1189 |
+
linkedin_urls,
|
| 1190 |
+
github_urls,
|
| 1191 |
+
missing_sections,
|
| 1192 |
+
sections_not_capitalized,
|
| 1193 |
+
common_projects=found_projects, # Ensure to pass found projects
|
| 1194 |
+
section_order_suggestion=experience_order_suggestion,
|
| 1195 |
+
sections_text=sections_text,
|
| 1196 |
+
skills=skills_found,
|
| 1197 |
+
relevant_experience_score=relevant_experience_score,
|
| 1198 |
+
#pdf_path=path
|
| 1199 |
+
#relevant_keywords_found=bool(found_keywords), # Convert to boolean
|
| 1200 |
+
#experience_orderly_arranged=experience_order_suggestion, # Pass orderly arrangement check
|
| 1201 |
+
#experience_section_present="WORK EXPERIENCE" in sections_text # Check if experience section is present
|
| 1202 |
+
)
|
| 1203 |
+
|
| 1204 |
+
|
| 1205 |
+
|
| 1206 |
+
# Populate resume data dictionary
|
| 1207 |
+
resume_data = {
|
| 1208 |
+
"name": name,
|
| 1209 |
+
"contact_number": contact_number,
|
| 1210 |
+
"email": email,
|
| 1211 |
+
"linkedin_urls": linkedin_urls,
|
| 1212 |
+
"experience_order_suggestion": experience_order_suggestion,
|
| 1213 |
+
"education_order_suggestion": education_order_suggestion,
|
| 1214 |
+
"grammer_issues_by_section": section_by_grammer_issues,
|
| 1215 |
+
"github_urls": github_urls,
|
| 1216 |
+
"skills": skills_found,
|
| 1217 |
+
"spelling_suggestions": spelling_suggestions,
|
| 1218 |
+
"found_keywords": found_keywords,
|
| 1219 |
+
"text": text,
|
| 1220 |
+
"font_suggestions": font_suggestions,
|
| 1221 |
+
"name_suggestion": name_suggestion,
|
| 1222 |
+
"contact_suggestion": contact_suggestion,
|
| 1223 |
+
"email_suggestion": email_suggestion,
|
| 1224 |
+
"github_urls_suggestions": github_urls_suggestions,
|
| 1225 |
+
"linkedin_urls_suggestion": "Add the LinkedIn URLs to the resume." if not linkedin_urls else "",
|
| 1226 |
+
"missing_sections": missing_sections,
|
| 1227 |
+
"common_projects": "Common projects found in Projects section: " + ", ".join(found_projects) if found_projects else "",
|
| 1228 |
+
"project_length_suggestion": project_length_suggestion,
|
| 1229 |
+
"section_grammar_check_issues": section_grammar_check_issues,
|
| 1230 |
+
"imarticus_score": imarticus_score, # Add the score to resume data
|
| 1231 |
+
"extra_urls": extra_urls,
|
| 1232 |
+
"certifications": {
|
| 1233 |
+
"found": found_imarticus_certification["found"],
|
| 1234 |
+
"message": found_imarticus_certification["message"],
|
| 1235 |
+
"text": certifications_text # Store extracted certification text
|
| 1236 |
+
},
|
| 1237 |
+
"recommended_blogs": recommended_blogs,
|
| 1238 |
+
"recommended_youtube_links": recommended_youtube,
|
| 1239 |
+
"name_score":name_score,
|
| 1240 |
+
"contact_score":contact_score,
|
| 1241 |
+
"email_score":email_score,
|
| 1242 |
+
"github_urls_score":github_url_score
|
| 1243 |
+
|
| 1244 |
+
}
|
| 1245 |
+
|
| 1246 |
+
# Additional checks and data additions
|
| 1247 |
+
if "WORK EXPERIENCE" in sections_text.keys() and "WORK EXPERIENCE" != list(sections_text.keys())[2]:
|
| 1248 |
+
section_order_suggestion = f"WORK EXPERIENCE should come before {list(sections_text.keys())[2]}"
|
| 1249 |
+
resume_data["section_order_suggestion"] = section_order_suggestion
|
| 1250 |
+
|
| 1251 |
+
missing_important_sections = self.check_missing_sections(resume_data)
|
| 1252 |
+
resume_data["basic_information_section"] = missing_important_sections or "Basic information is Found"
|
| 1253 |
+
|
| 1254 |
+
missing_skills = list(set(essential_skills) - set(skills_found))
|
| 1255 |
+
resume_data["missing_skills"] = missing_skills
|
| 1256 |
+
|
| 1257 |
+
found_keywords_count = len(resume_data["found_keywords"])
|
| 1258 |
+
num_keywords = len(keyword_variations)
|
| 1259 |
+
quality_mapping = {"Low": 0.2, "Medium": 0.5, "High": 0.8} # Assuming some quality mapping
|
| 1260 |
+
for quality, threshold in quality_mapping.items():
|
| 1261 |
+
if found_keywords_count < num_keywords * threshold:
|
| 1262 |
+
resume_data["quality"] = quality
|
| 1263 |
+
break
|
| 1264 |
+
|
| 1265 |
+
found_certification = "Imarticus certification found in Certifications section." if found_imarticus_certification else "No Imarticus certification found in Certifications section."
|
| 1266 |
+
resume_data["found_certification"] = found_certification
|
| 1267 |
+
|
| 1268 |
+
# Experience relevance check
|
| 1269 |
+
Extract_exp_sections = ['WORK EXPERIENCE']
|
| 1270 |
+
experience_text = self.extract_and_format_sections(sections_text, Extract_exp_sections)
|
| 1271 |
+
if experience_text:
|
| 1272 |
+
resume_data["work_experience_check"] = "Experience is relevant to Data science." if any(variation.lower() in experience_text.lower() for keyword, variations in keyword_variations.items() for variation in variations) else "Experience is not relevant to Data science."
|
| 1273 |
+
|
| 1274 |
+
return jsonify(resume_data)
|
src/reviewer.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import language_tool_python, json, re, logging
|
| 2 |
+
from flask import jsonify
|
| 3 |
+
tool = language_tool_python.LanguageTool('en-US')
|
| 4 |
+
# from pdfminer.high_level import extract_text
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class ResumeReviewer:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
pass
|
| 10 |
+
|
| 11 |
+
def review(self):
|
| 12 |
+
pass
|
| 13 |
+
|
| 14 |
+
def grammar_check(self, parsed_resume):
|
| 15 |
+
text = parsed_resume["text"]
|
| 16 |
+
matches = tool.check(text)
|
| 17 |
+
corrected_text = tool.correct(text)
|
| 18 |
+
grammar_issues = []
|
| 19 |
+
for match in matches:
|
| 20 |
+
issue = {
|
| 21 |
+
"error": match.message,
|
| 22 |
+
"suggested_correction": match.replacements,
|
| 23 |
+
"context": match.context,
|
| 24 |
+
"rule_id": match.ruleId
|
| 25 |
+
}
|
| 26 |
+
grammar_issues.append(issue)
|
| 27 |
+
grammar_result = {
|
| 28 |
+
"original_text": text,
|
| 29 |
+
"grammar_issues": grammar_issues,
|
| 30 |
+
"corrected_text": corrected_text
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
return jsonify(grammar_result)
|
src/submitter.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from .config import UPLOAD_FOLDER, ALLOWED_EXTENSIONS
|
| 3 |
+
from flask import render_template, redirect, flash, request, url_for
|
| 4 |
+
from werkzeug.utils import secure_filename
|
| 5 |
+
|
| 6 |
+
class ResumeSubmitter:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
if not os.path.exists(UPLOAD_FOLDER):
|
| 9 |
+
os.makedirs(UPLOAD_FOLDER)
|
| 10 |
+
|
| 11 |
+
def allowed_file(self, filename):
|
| 12 |
+
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
| 13 |
+
|
| 14 |
+
def upload_form(self):
|
| 15 |
+
return render_template("upload_resume.html")
|
| 16 |
+
|
| 17 |
+
def upload_file(self):
|
| 18 |
+
if 'file' not in request.files:
|
| 19 |
+
return 'No file part'
|
| 20 |
+
|
| 21 |
+
file = request.files['file']
|
| 22 |
+
if file.filename == '':
|
| 23 |
+
return 'No selected file'
|
| 24 |
+
|
| 25 |
+
if file and self.allowed_file(file.filename):
|
| 26 |
+
filename = secure_filename(file.filename)
|
| 27 |
+
file.save(os.path.join(UPLOAD_FOLDER, filename))
|
| 28 |
+
flash('File successfully uploaded')
|
| 29 |
+
# return file path
|
| 30 |
+
return os.path.join(UPLOAD_FOLDER, filename)
|
| 31 |
+
else:
|
| 32 |
+
return "Allowed file types are PDF as of now"
|