resume_parser / src /config.py
Imarticuslearning's picture
Upload 6 files
227f173 verified
import os
BASE_DIR = os.getcwd()
UPLOAD_FOLDER = os.path.join(BASE_DIR, '..', 'data', 'uploads')
ALLOWED_EXTENSIONS = {'pdf'}
linkedin_domain = (r'https?://(www\.)?linkedin\.com/[^\s<>"]')
github_domain = (r'https?://(www\.)?github\.com/[^\s<>"]')
kaggle_domain = (r'https?://(www\.)?kaggle\.com/[^\s<>"]')
medium_domain = (r'https?://(www\.)?medium\.com/[^\s<>"]')
hackerrank_domain = (r'https?://(www\.)?hackerrank\.com/[^\s<>"]')
leetcode_domain = (r'https?://(www\.)?leetcode\.com/[^\s<>"]')
required_sections = ['PROFILE SUMMARY','ACADEMIC PROFILE','TECHNICAL SKILLS','CERTIFICATIONS','PROJECTS','CAREER OBJECTIVE']
basic_informations = ["name", "contact_number", "email", "linkedin_urls", "github_urls"]
data_science_skills = ['queries', 'beautifulsoup', 'ms excel', 'mathematics', 'selenium',
'html', 'analytical skills', 'statsmodels','ai', 'improvement',
'analyze', 'metrics', 'forecasting', 'analytics', 'analytical',
'mysql', 'postgresql', 'database', 'writing', 'excel','regulations',
'algorithms', 'scipy', 'opencv', 'reports', 'eda', 'jupyter',
'presentations', 'modeling', 'audit', 'technical skills',
'schedule', 'nltk', 'iso', 'xgboost', 'segmentation', 'github',
'seaborn', 'keras', 'distribution', 'investigation', 'tableau',
'probability', 'analysis', 'r', 'technical', 'programming',
'web scraping', 'research', 'pandas', 'statistical analysis',
'numpy', 'predictive analysis', 'tensorflow', 'hypothesis',
'matplotlib', 'scikit-learn', 'information technology',
'machine learning', 'cloud', 'streamlit', 'mining', 'python',
'data analytics', 'deep learning', 'testing', 'training',
'clustering & classification', 'data analysis', 'engineering',
'data visualization', 'quantitative analysis', 'statistics',
'flask', 'statistical modeling', 'pytorch', 'data mining',
'aws', 'sql']
essential_skills = ["Python", "SQL", "MySQL", "Tableau", "NumPy",
"Statsmodels", "CNN", "ANN",
"RNN", "Machine Learning", "Deep Learning", "SciKit Learn", "MS Excel",
"Data Visualization", "Power BI", "Data Analysis"]
quality_mapping = {
'Resume needs significant improvement': 0.15,
'Resume needs improvement': 0.35,
'Resume is average': 0.55,
'Resume is good': 0.75,
'Resume is very good': 0.90,
'Resume is excellent': 1,
'The resume is bad': 1.1
}
keyword_variations = {
"Python": ["Python", "Python_Language", "Python Programming"],
"SQL": ["SQL", "SQL_Language", "Structured Query Language", "Structured_Query_Language"],
"MySQL": ["MySQL", "MySQL_Database", "My_SQL", "My SQL"],
"Pandas": ["Pandas", "Pandas_Library", "Pandas Data Analysis Library","Pandas_Data Analysis_Library"],
"R": [" R ", "R_Programming", "R Language",",R "," R,", ",R,"],
"Matplotlib": ["Matplotlib", "Matplotlib_Library", "Matplotlib Plotting Library","Matplotlib_Plotting_Library"],
"Seaborn": ["Seaborn", "Seaborn_Library", "Seaborn Data Visualization Library"],
"StatsModel": ["StatsModel", "StatsModel_Library", "StatsModel Statistical Library", "Statistical Modeling Library", "Statistics Modeling", "StatModelLib", "StatsMod", "SM Library", "SM"],
"Tableau": ["Tableau", "Tableau_Software", "Tableau Data Visualization", "Tableau Analytics", "Tableau BI Tool", "Tableau Visualization Software", "Tableau Data Analysis", "Tableau BI","TableauBI"],
"TensorFlow": ["TensorFlow", "TensorFlow_Library"],
"NumPy": ["NumPy", "NumPy_Library", "Numerical Computing Library"],
"PyTorch": ["PyTorch", "PyTorch_Library"],
"Keras": ["Keras", "Keras_Library"],
"Plotly": ["Plotly", "Plotly_Library",],
"RFM": ["RFM", "RFM_Analysis", "Recency Frequency Monetary Analysis"],
"ANOVA": ["ANOVA", "ANOVA_Test", "Analysis of Variance","Analysis_of_Variance"],
"BeautifulSoup": ["BeautifulSoup", "BeautifulSoup_Library"],
"Imputation": ["Imputation", "Data_Imputation","Data Imputation", "Missing Data Imputation"],
"Scrappy": ["Scrappy", "Scrappy_Library"],
"Selenium": ["Selenium", "Selenium_Library", "Selenium WebDriver", "Selenium Automation"],
"TensorBoard": ["TensorBoard", "TensorBoard_Library", "TensorBoard Visualization Tool"],
"SciPy": ["SciPy", "SciPy_Library", "Scientific Computing Library"],
"OpenCV": ["OpenCV", "OpenCV_Library", "Computer Vision Library"],
"NLTK": ["NLTK", "NLTK_Library", "Natural Language Toolkit"],
"Hadoop": ["Hadoop", "Hadoop_Framework"],
"Spark": ["Spark", "Spark_Framework", "Apache_Spark"],
"spacy": ["spacy","Spacy_Library"],
"AdaBoost": ["AdaBoost","Ada_Boost","Ada Boost", "AdaBoost_Algorithm", "Adaptive Boosting","Adaptive_Boosting"],
"XGBoost": ["XGBoost","XG_Boost","XG Boost", "XGBoost_Algorithm", "Extreme Gradient Boosting"],
"CNN": [" CNN ", "CNN,", ",CNN", "Convolutional Neural Network", "ConvNet", "CNN Algorithm","CNN"],
"ANN": [" ANN ", "ANN,", ",ANN", "Artificial Neural Network", "ANN Algorithm","ANN"],
"RNN": [" RNN ", "RNN,", ",RNN", "Recurrent Neural Network", "RNN Algorithm","RNN"],
"KNN": [" kNN ", "kNN,", ",kNN","K-Nearest Neighbours", "K_Nearest_Neighbours", "K-Nearest-Neighbours", "K Nearest Neighbours", "KNN"],
"LSTM": ["LSTM", "Long Short-Term Memory", "LSTM Network", "LSTM Algorithm"],
"GAN": [" GAN ", "GAN,", ",GAN", "Generative Adversarial Network", "GAN Algorithm"," GAN "],
"YOLO": ["YOLO", "You Only Look Once", "YOLO_Algorithm"],
"Clustering": ["Clustering", "Clustering_Algorithms", "Data_Clustering"],
"Classification": ["Classification", "Classification_Algorithms", "Data_Classification"],
"Word2Vec": ["Word2Vec", "Word2Vec_Algorithm", "Word2Vec Word Embeddings","word2vector"],
"Tf-idf": ["Tf-idf","Tf_idf","Tf idf", "Term Frequency-Inverse Document Frequency", "Tf_idf_Algorithm","Tf-idf_Algorithm"],
"Tokenization": ["Tokenization", "Text_Tokenization", "Word_Tokenization"],
"Machine Learning": ["Machine Learning", "Machine_Learning", "Machine Learning Algorithms", "Machine_Learning_Algorithms", "ML"],
"Deep Learning": ["Deep Learning", "Deep_Learning", "Deep Learning Algorithms", "Deep_Learning_Algorithms", "DL"],
"SciKit Learn": ["SciKit Learn", "SciKit_Learn", "Sci Kit Learn", "SciKit-Learn","Sci_Kit_Learn", "sklearn","sk_learn"],
"Hugging Face": ["Hugging Face", "Hugging_Face", "HuggingFace"],
"MS Excel": ["Excel", "MS Excel","MSExcel", "MS_Excel", "Microsoft_Excel", "Microsoft Excel", "advance_excel","advance_MS_excel","advance_MSexcel", "advance excel", "Advance_Microsoft_excel", "Advance Microsoft excel"],
"Data Visualization": ["Data Visualization", "Data_Visualization", "Data_Viz", "Visualization"],
"Power BI": ["Power BI", "Power_BI", "Microsoft_Power_BI", "Microsoft Power BI","PowerBI"],
"Transfer Learning": ["Transfer Learning", "Transfer_Learning"],
"Linear Regression": ["Linear Regression", "Linear_Regression"],
"Logistic Regression": ["Logistic Regression", "Logistic_Regression"],
"Decision Tree": ["Decision Tree", "Decision_Tree"],
"Random Forest": ["Random Forest", "Random_Forest"],
"K-Means Clustering": ["K-Means Clustering", "K_Means_Clustering", "K-Means-Clustering", "K Means Clustering", "K-means", "k_means","K-mean", "k_mean"],
"T-test": ["T-test", "T_Test", "T Test"],
"Z-test": ["Z-test", "Z_Test", "Z Test"],
"Hypothesis Testing": ["Hypothesis Testing", "Hypothesis_Testing"],
"Chi-square": ["Chi-square", "Chi_Square", "Chi2"],
"Normal Distribution": ["Normal Distribution", "Normal_Distribution"],
"Correlation Analysis": ["Correlation Analysis", "Correlation_Analysis"],
"Feature Scaling": ["Feature Scaling", "Feature_Scaling"],
"Dimensionality Reduction": ["Dimensionality Reduction", "Dimensionality_Reduction"],
"Jupyter Notebook": ["Jupyter Notebook", "Jupyter_Notebook"],
"Google Colab": ["Google Colab", "Google_Colab"],
"Data Analysis": ["Data Analysis", "Data_Analysis"],
"Big Data": ["Big Data", "Big_Data"],
"Support Vector Machines (SVM)": ["Support Vector Machines (SVM)", "Support_Vector_Machines", "SVM", "Support Vector Machines", "Support_Vector_Machines_SVM"],
"Natural Language Processing": ["Natural Language Processing", "Natural_Language_Processing", "NLP"],
"Artificial Intelligence": ["Artificial Intelligence", "Artificial_Intelligence"," AI ",",AI "," AI,","AI"],
"Naive Bayes": ["Naive Bayes", "Naive_Bayes"],
"Principal Component Analysis (PCA)": ["Principal Component Analysis (PCA)", "Principal_Component_Analysis", "Principal Component Analysis", "PCA"],
"Descriptive Statistics": ["Descriptive Statistics", "Descriptive_Statistics"],
"Inferential Statistics": ["Inferential Statistics", "Inferential_Statistics"],
"Gradient Boosting Machines (GBM)": ["Gradient Boosting Machines (GBM)", "Gradient_Boosting_Machines", "Gradient Boosting Machines", "GBM","Gradient Boosting","Gradient_Boosting"],
"Association Rule Learning (Apriori)": ["Association Rule Learning (Apriori)", "Association_Rule_Learning", "Association Rule Learning", "Apriori"],
"Hierarchical Clustering": ["Hierarchical Clustering", "Hierarchical_Clustering"],
"Image Segmentation": ["Image Segmentation", "Image_Segmentation"],
"Object Detection": ["Object Detection", "Object_Detection"],
"Encoder Decoder": ["Encoder - Decoder", "Encoder_Decoder","Encoder Decoder","Encoder Decode",
"Sequence-to-Sequence Models", "Seq2Seq Models", "Language Encoding", "Language Decoding", "Text Encoding", "Text Decoding",
"Image Encoding", "Image Decoding", "Audio Encoding", "Audio Decoding", "Video Encoding", "Video Decoding", "Speech Encoding", "Speech Decoding", "Data Compression",
"Data Encryption", "Data Decryption","Encoder","Decoder"],
"Word Embedding": ["Word Embedding", "Word_Embedding"],
"Bag of Words": ["Bag of Words", "Bag_of_Words"],
"Sentiment Analysis": ["Sentiment Analysis", "Sentiment_Analysis"],
"Predictive Analysis": ["Predictive Analysis", "Predictive_Analysis"],
"Statistical Modeling": ["Statistical Modeling", "Statistical_Modeling","Statistical_Analysis","Statistical Analysis"],
"Data Preprocessing": ["Data Preprocessing", "Data_Preprocessing"],
"Model Development": ["Model Development", "Model_Development"],
"Time Series Analysis": ["Time Series Analysis", "Time_Series_Analysis","TimeSeries","TimeSeries_Analysis"],
"Statistics Fundamentals": ["Statistics Fundamentals", "Statistics_Fundamentals"],
"Advanced ML": ["Advanced ML", "Advanced_ML", "Advanced Machine Learning", "Advanced_Machine_Learning", "Advanced-ML"],
"Advanced DL": ["Advanced DL", "Advanced_DL", "Advanced Deep Learning", "Advanced_Deep_Learning", "Advanced-DL"],
"EDA": ["EDA","Exploratory_Data_Analysis","Exploratory Data Analysis"],
"Data Mining":["Data Mining","Data_Mining"],
"Outlier Detection": ["Outlier_Detection","Outlier Detection"],
"Missing Values Handling": ["Missing Values Handling","Missing_Values_Handling","Missing Values"],
"Scaling Techniques": ["Scaling Techniques","Feature Scaling","Feature_Scaling","Data Scaling","Data_Scaling","Data Normalization","Data_Normalization","Standardization","Min-Max Scaling","Min-Max_Scaling","Normalization"],
"R2 and Adjusted R2": ["R2 Score","R2_Score","Adjusted_R2_Score","Adjusted R2 Score","R Squared Score","R_Squared_Score","R2 Accuracy","R2_Accuracy","Adjusted R2 Accuracy","R2 Metric","Adjusted R2 Metric"],
"Accuracy, Recall, F1 Score": ["Accuracy","Classification_Accuracy","Accuracy_Metrics","Recall","Precision","Recall_Score","F1 Score","F1-Score","F1_Metric","F1_Score","Classification_F1-Score"],
"MS Office": ["MS_Office","MS Office","Microsoft Office","MS Word","MS_Word","Microsoft_Office","Microsoft_Word","Microsoft Word"],
"Subquery": ["Subquery","Sub-query","Nested Query","Inner Query"],
"SQL Join": ["SQL Join","Join in SQL","Join"],
"Stemming": ["Stemming","Stemming Algorithm","Word Stemming","Stemming Techniques","Stemming in NLP","Text_Stemming","Text Stemming"],
"Stopwords": ["Stopwords","Stop Words","Common Words","Text Stopwords","Stopwords Removal","Removing Stopwords","Stopwords List","Stopwords in NLP"],
"docker_variations" : ["Docker Integration","Docker", "Docker Automation", "Advanced Docker","Advanced_Docker", "Docker Tools"],
"jenkins_variations" : ["Jenkins","Jenkins CI/CD","CI/CD", "Jenkins Automation", "Jenkins Pipeline", "Jenkins Plugins"],
"prometheus_variations" : ["Prometheus Monitoring", "Prometheus Metrics", "PromQL", "Prometheus Alerting"],
"cicd_variations" : ["Continuous Integration", "Continuous Deployment", "CI/CD Automation", "CI/CD Tools"],
"flask_variations" : ["Flask","Flask Framework", "Flask RESTful", "Flask Deployment", "Flask Security"],
"fastapi_variations" : ["FastAPI","FastAPI Framework", "FastAPI RESTful","FastAPI_RESTful", "FastAPI Deployment", "FastAPI Tools","FastAPI_Tools"],
"django_variations" : ["Django Framework", "Django Web Development", "Django REST Framework", "Django Deployment","Django"],
"aws_variations" : ["Amazon Web Services", "AWS Cloud", "AWS Services", "AWS Management","AWS","AWS_Cloud"],
"statistics_variations" : ["Statistical Analysis", "Descriptive Statistics", "Inferential Statistics", "Probability Theory"],
"hypothesis_testing_variations" : ["Null Hypothesis", "Alternative Hypothesis", "Significance Level", "Type I Error"],
"smote_variations" : ["Synthetic Minority Over-sampling Technique", "SMOTE Algorithm", "SMOTE Python", "SMOTE Applications","SMOTE"],
"mlflow_variations" : ["MLflow Framework", "MLflow Tracking", "MLflow Deployment", "MLflow Integration","MLflow"],
"packaging_variations" : ["Software Packaging", "Package Management", "Python Packaging", "Packaging Best Practices"],
"version_control_variations" : ["Git Version Control", "Git Commands", "Git Workflow", "Git Collaboration","Git"],
"communication skills" : ["communication_skills" , "communication_skill" ,"communication skills"],
"problem-solving": ["problem-solving","problem_solving", "problem-solving"],
"decision making" : ["decision making" , "decision-making","decision_making"]
}
Extract_sections = ["CAREER OBJECTIVE", "PROFILE SUMMARY"]
section_headers = [
"CAREER OBJECTIVE", "PROFILE SUMMARY", "WORK EXPERIENCE", "EDUCATION","ADDITIONAL INFORMATION AND HOBBIES",
"ACADEMIC PROFILE", "PROJECTS", "CERTIFICATIONS","SKILLS",
"PERSONAL SKILLS", "PERSONAL INFORMATION", "REFERENCES",
"EXTRACURRICULAR ACTIVITIES", "TECHNICAL SKILLS", "KEY SKILLS",
"ADDITIONAL INFORMATION", "CERTIFICATIONS & ACADEMIC ENDEAVOURS",
"AWARDS & ACCOLADES", "SOFTWARE SKILLS", "AWARDS"
]
common_projects = ["Titanic","Iris","MNIST", "COVID-19", "Bank Churn",
"Spam","Handwritten Digit","Heart Disease","House Price",
"Diabetes","Twitter", "Churn",
"Wine Quality", "Loan","Titanic Survival Prediction",
"Iris Flower Classification",
"House Price Prediction",
"MNIST Handwritten Digit Recognition",
"Customer Churn Prediction",
"Sentiment Analysis of Movie Reviews",
"Spam Email Detection",
"Fake News Detection",
"Image Classification with CNNs",
"Stock Price Prediction"]
suggested_projects = ["Predicting Patient Readmissions in Hospitals",
"Optimizing Ad Spend with Machine Learning Models","Developing a Fake News Detection System",
"Developing an AI Chatbot for Customer Service Automation","Personalized Health Recommendations Using Wearable Data"]
# Specify rule IDs and error keywords to ignore
ignore_rule_ids = ['WHITESPACE_RULE']
ignore_error_keywords = ['repeated a whitespace']
# Blogs & Articles
blog_articles = ["https://www.dataquest.io/blog/how-data-science-resume-cv/",
"https://medium.com/data-science-at-microsoft/writing-a-resume-for-a-data-science-role-345b98bdf80b",
"https://medium.com/@alicechen.ai/resume-201-how-to-write-an-effective-data-science-resume-441cbe6c0932"
]
# Links
youtube_links = ["https://youtu.be/Tt08KmFfIYQ?si=EdebdWUfbttysrfL",
"https://youtu.be/R3abknwWX7k?si=m4EyviXgKDoPgIGr",
"https://youtu.be/1-z9ptlBar4?si=lA7WgU4j4MFGjBZV",
"https://youtu.be/pjqi_M3SPwY?si=5aRizcfpreKR9xUr",
"https://youtu.be/ROfceyeD7f4?si=OTbrL7BUKSW1u2mt"]