Spaces:
Runtime error
Runtime error
| import os | |
| BASE_DIR = os.getcwd() | |
| UPLOAD_FOLDER = os.path.join(BASE_DIR, '..', 'data', 'uploads') | |
| ALLOWED_EXTENSIONS = {'pdf'} | |
| linkedin_domain = (r'https?://(www\.)?linkedin\.com/[^\s<>"]') | |
| github_domain = (r'https?://(www\.)?github\.com/[^\s<>"]') | |
| kaggle_domain = (r'https?://(www\.)?kaggle\.com/[^\s<>"]') | |
| medium_domain = (r'https?://(www\.)?medium\.com/[^\s<>"]') | |
| hackerrank_domain = (r'https?://(www\.)?hackerrank\.com/[^\s<>"]') | |
| leetcode_domain = (r'https?://(www\.)?leetcode\.com/[^\s<>"]') | |
| required_sections = ['PROFILE SUMMARY','ACADEMIC PROFILE','TECHNICAL SKILLS','CERTIFICATIONS','PROJECTS','CAREER OBJECTIVE'] | |
| basic_informations = ["name", "contact_number", "email", "linkedin_urls", "github_urls"] | |
| data_science_skills = ['queries', 'beautifulsoup', 'ms excel', 'mathematics', 'selenium', | |
| 'html', 'analytical skills', 'statsmodels','ai', 'improvement', | |
| 'analyze', 'metrics', 'forecasting', 'analytics', 'analytical', | |
| 'mysql', 'postgresql', 'database', 'writing', 'excel','regulations', | |
| 'algorithms', 'scipy', 'opencv', 'reports', 'eda', 'jupyter', | |
| 'presentations', 'modeling', 'audit', 'technical skills', | |
| 'schedule', 'nltk', 'iso', 'xgboost', 'segmentation', 'github', | |
| 'seaborn', 'keras', 'distribution', 'investigation', 'tableau', | |
| 'probability', 'analysis', 'r', 'technical', 'programming', | |
| 'web scraping', 'research', 'pandas', 'statistical analysis', | |
| 'numpy', 'predictive analysis', 'tensorflow', 'hypothesis', | |
| 'matplotlib', 'scikit-learn', 'information technology', | |
| 'machine learning', 'cloud', 'streamlit', 'mining', 'python', | |
| 'data analytics', 'deep learning', 'testing', 'training', | |
| 'clustering & classification', 'data analysis', 'engineering', | |
| 'data visualization', 'quantitative analysis', 'statistics', | |
| 'flask', 'statistical modeling', 'pytorch', 'data mining', | |
| 'aws', 'sql'] | |
| essential_skills = ["Python", "SQL", "MySQL", "Tableau", "NumPy", | |
| "Statsmodels", "CNN", "ANN", | |
| "RNN", "Machine Learning", "Deep Learning", "SciKit Learn", "MS Excel", | |
| "Data Visualization", "Power BI", "Data Analysis"] | |
| quality_mapping = { | |
| 'Resume needs significant improvement': 0.15, | |
| 'Resume needs improvement': 0.35, | |
| 'Resume is average': 0.55, | |
| 'Resume is good': 0.75, | |
| 'Resume is very good': 0.90, | |
| 'Resume is excellent': 1, | |
| 'The resume is bad': 1.1 | |
| } | |
| keyword_variations = { | |
| "Python": ["Python", "Python_Language", "Python Programming"], | |
| "SQL": ["SQL", "SQL_Language", "Structured Query Language", "Structured_Query_Language"], | |
| "MySQL": ["MySQL", "MySQL_Database", "My_SQL", "My SQL"], | |
| "Pandas": ["Pandas", "Pandas_Library", "Pandas Data Analysis Library","Pandas_Data Analysis_Library"], | |
| "R": [" R ", "R_Programming", "R Language",",R "," R,", ",R,"], | |
| "Matplotlib": ["Matplotlib", "Matplotlib_Library", "Matplotlib Plotting Library","Matplotlib_Plotting_Library"], | |
| "Seaborn": ["Seaborn", "Seaborn_Library", "Seaborn Data Visualization Library"], | |
| "StatsModel": ["StatsModel", "StatsModel_Library", "StatsModel Statistical Library", "Statistical Modeling Library", "Statistics Modeling", "StatModelLib", "StatsMod", "SM Library", "SM"], | |
| "Tableau": ["Tableau", "Tableau_Software", "Tableau Data Visualization", "Tableau Analytics", "Tableau BI Tool", "Tableau Visualization Software", "Tableau Data Analysis", "Tableau BI","TableauBI"], | |
| "TensorFlow": ["TensorFlow", "TensorFlow_Library"], | |
| "NumPy": ["NumPy", "NumPy_Library", "Numerical Computing Library"], | |
| "PyTorch": ["PyTorch", "PyTorch_Library"], | |
| "Keras": ["Keras", "Keras_Library"], | |
| "Plotly": ["Plotly", "Plotly_Library",], | |
| "RFM": ["RFM", "RFM_Analysis", "Recency Frequency Monetary Analysis"], | |
| "ANOVA": ["ANOVA", "ANOVA_Test", "Analysis of Variance","Analysis_of_Variance"], | |
| "BeautifulSoup": ["BeautifulSoup", "BeautifulSoup_Library"], | |
| "Imputation": ["Imputation", "Data_Imputation","Data Imputation", "Missing Data Imputation"], | |
| "Scrappy": ["Scrappy", "Scrappy_Library"], | |
| "Selenium": ["Selenium", "Selenium_Library", "Selenium WebDriver", "Selenium Automation"], | |
| "TensorBoard": ["TensorBoard", "TensorBoard_Library", "TensorBoard Visualization Tool"], | |
| "SciPy": ["SciPy", "SciPy_Library", "Scientific Computing Library"], | |
| "OpenCV": ["OpenCV", "OpenCV_Library", "Computer Vision Library"], | |
| "NLTK": ["NLTK", "NLTK_Library", "Natural Language Toolkit"], | |
| "Hadoop": ["Hadoop", "Hadoop_Framework"], | |
| "Spark": ["Spark", "Spark_Framework", "Apache_Spark"], | |
| "spacy": ["spacy","Spacy_Library"], | |
| "AdaBoost": ["AdaBoost","Ada_Boost","Ada Boost", "AdaBoost_Algorithm", "Adaptive Boosting","Adaptive_Boosting"], | |
| "XGBoost": ["XGBoost","XG_Boost","XG Boost", "XGBoost_Algorithm", "Extreme Gradient Boosting"], | |
| "CNN": [" CNN ", "CNN,", ",CNN", "Convolutional Neural Network", "ConvNet", "CNN Algorithm","CNN"], | |
| "ANN": [" ANN ", "ANN,", ",ANN", "Artificial Neural Network", "ANN Algorithm","ANN"], | |
| "RNN": [" RNN ", "RNN,", ",RNN", "Recurrent Neural Network", "RNN Algorithm","RNN"], | |
| "KNN": [" kNN ", "kNN,", ",kNN","K-Nearest Neighbours", "K_Nearest_Neighbours", "K-Nearest-Neighbours", "K Nearest Neighbours", "KNN"], | |
| "LSTM": ["LSTM", "Long Short-Term Memory", "LSTM Network", "LSTM Algorithm"], | |
| "GAN": [" GAN ", "GAN,", ",GAN", "Generative Adversarial Network", "GAN Algorithm"," GAN "], | |
| "YOLO": ["YOLO", "You Only Look Once", "YOLO_Algorithm"], | |
| "Clustering": ["Clustering", "Clustering_Algorithms", "Data_Clustering"], | |
| "Classification": ["Classification", "Classification_Algorithms", "Data_Classification"], | |
| "Word2Vec": ["Word2Vec", "Word2Vec_Algorithm", "Word2Vec Word Embeddings","word2vector"], | |
| "Tf-idf": ["Tf-idf","Tf_idf","Tf idf", "Term Frequency-Inverse Document Frequency", "Tf_idf_Algorithm","Tf-idf_Algorithm"], | |
| "Tokenization": ["Tokenization", "Text_Tokenization", "Word_Tokenization"], | |
| "Machine Learning": ["Machine Learning", "Machine_Learning", "Machine Learning Algorithms", "Machine_Learning_Algorithms", "ML"], | |
| "Deep Learning": ["Deep Learning", "Deep_Learning", "Deep Learning Algorithms", "Deep_Learning_Algorithms", "DL"], | |
| "SciKit Learn": ["SciKit Learn", "SciKit_Learn", "Sci Kit Learn", "SciKit-Learn","Sci_Kit_Learn", "sklearn","sk_learn"], | |
| "Hugging Face": ["Hugging Face", "Hugging_Face", "HuggingFace"], | |
| "MS Excel": ["Excel", "MS Excel","MSExcel", "MS_Excel", "Microsoft_Excel", "Microsoft Excel", "advance_excel","advance_MS_excel","advance_MSexcel", "advance excel", "Advance_Microsoft_excel", "Advance Microsoft excel"], | |
| "Data Visualization": ["Data Visualization", "Data_Visualization", "Data_Viz", "Visualization"], | |
| "Power BI": ["Power BI", "Power_BI", "Microsoft_Power_BI", "Microsoft Power BI","PowerBI"], | |
| "Transfer Learning": ["Transfer Learning", "Transfer_Learning"], | |
| "Linear Regression": ["Linear Regression", "Linear_Regression"], | |
| "Logistic Regression": ["Logistic Regression", "Logistic_Regression"], | |
| "Decision Tree": ["Decision Tree", "Decision_Tree"], | |
| "Random Forest": ["Random Forest", "Random_Forest"], | |
| "K-Means Clustering": ["K-Means Clustering", "K_Means_Clustering", "K-Means-Clustering", "K Means Clustering", "K-means", "k_means","K-mean", "k_mean"], | |
| "T-test": ["T-test", "T_Test", "T Test"], | |
| "Z-test": ["Z-test", "Z_Test", "Z Test"], | |
| "Hypothesis Testing": ["Hypothesis Testing", "Hypothesis_Testing"], | |
| "Chi-square": ["Chi-square", "Chi_Square", "Chi2"], | |
| "Normal Distribution": ["Normal Distribution", "Normal_Distribution"], | |
| "Correlation Analysis": ["Correlation Analysis", "Correlation_Analysis"], | |
| "Feature Scaling": ["Feature Scaling", "Feature_Scaling"], | |
| "Dimensionality Reduction": ["Dimensionality Reduction", "Dimensionality_Reduction"], | |
| "Jupyter Notebook": ["Jupyter Notebook", "Jupyter_Notebook"], | |
| "Google Colab": ["Google Colab", "Google_Colab"], | |
| "Data Analysis": ["Data Analysis", "Data_Analysis"], | |
| "Big Data": ["Big Data", "Big_Data"], | |
| "Support Vector Machines (SVM)": ["Support Vector Machines (SVM)", "Support_Vector_Machines", "SVM", "Support Vector Machines", "Support_Vector_Machines_SVM"], | |
| "Natural Language Processing": ["Natural Language Processing", "Natural_Language_Processing", "NLP"], | |
| "Artificial Intelligence": ["Artificial Intelligence", "Artificial_Intelligence"," AI ",",AI "," AI,","AI"], | |
| "Naive Bayes": ["Naive Bayes", "Naive_Bayes"], | |
| "Principal Component Analysis (PCA)": ["Principal Component Analysis (PCA)", "Principal_Component_Analysis", "Principal Component Analysis", "PCA"], | |
| "Descriptive Statistics": ["Descriptive Statistics", "Descriptive_Statistics"], | |
| "Inferential Statistics": ["Inferential Statistics", "Inferential_Statistics"], | |
| "Gradient Boosting Machines (GBM)": ["Gradient Boosting Machines (GBM)", "Gradient_Boosting_Machines", "Gradient Boosting Machines", "GBM","Gradient Boosting","Gradient_Boosting"], | |
| "Association Rule Learning (Apriori)": ["Association Rule Learning (Apriori)", "Association_Rule_Learning", "Association Rule Learning", "Apriori"], | |
| "Hierarchical Clustering": ["Hierarchical Clustering", "Hierarchical_Clustering"], | |
| "Image Segmentation": ["Image Segmentation", "Image_Segmentation"], | |
| "Object Detection": ["Object Detection", "Object_Detection"], | |
| "Encoder Decoder": ["Encoder - Decoder", "Encoder_Decoder","Encoder Decoder","Encoder Decode", | |
| "Sequence-to-Sequence Models", "Seq2Seq Models", "Language Encoding", "Language Decoding", "Text Encoding", "Text Decoding", | |
| "Image Encoding", "Image Decoding", "Audio Encoding", "Audio Decoding", "Video Encoding", "Video Decoding", "Speech Encoding", "Speech Decoding", "Data Compression", | |
| "Data Encryption", "Data Decryption","Encoder","Decoder"], | |
| "Word Embedding": ["Word Embedding", "Word_Embedding"], | |
| "Bag of Words": ["Bag of Words", "Bag_of_Words"], | |
| "Sentiment Analysis": ["Sentiment Analysis", "Sentiment_Analysis"], | |
| "Predictive Analysis": ["Predictive Analysis", "Predictive_Analysis"], | |
| "Statistical Modeling": ["Statistical Modeling", "Statistical_Modeling","Statistical_Analysis","Statistical Analysis"], | |
| "Data Preprocessing": ["Data Preprocessing", "Data_Preprocessing"], | |
| "Model Development": ["Model Development", "Model_Development"], | |
| "Time Series Analysis": ["Time Series Analysis", "Time_Series_Analysis","TimeSeries","TimeSeries_Analysis"], | |
| "Statistics Fundamentals": ["Statistics Fundamentals", "Statistics_Fundamentals"], | |
| "Advanced ML": ["Advanced ML", "Advanced_ML", "Advanced Machine Learning", "Advanced_Machine_Learning", "Advanced-ML"], | |
| "Advanced DL": ["Advanced DL", "Advanced_DL", "Advanced Deep Learning", "Advanced_Deep_Learning", "Advanced-DL"], | |
| "EDA": ["EDA","Exploratory_Data_Analysis","Exploratory Data Analysis"], | |
| "Data Mining":["Data Mining","Data_Mining"], | |
| "Outlier Detection": ["Outlier_Detection","Outlier Detection"], | |
| "Missing Values Handling": ["Missing Values Handling","Missing_Values_Handling","Missing Values"], | |
| "Scaling Techniques": ["Scaling Techniques","Feature Scaling","Feature_Scaling","Data Scaling","Data_Scaling","Data Normalization","Data_Normalization","Standardization","Min-Max Scaling","Min-Max_Scaling","Normalization"], | |
| "R2 and Adjusted R2": ["R2 Score","R2_Score","Adjusted_R2_Score","Adjusted R2 Score","R Squared Score","R_Squared_Score","R2 Accuracy","R2_Accuracy","Adjusted R2 Accuracy","R2 Metric","Adjusted R2 Metric"], | |
| "Accuracy, Recall, F1 Score": ["Accuracy","Classification_Accuracy","Accuracy_Metrics","Recall","Precision","Recall_Score","F1 Score","F1-Score","F1_Metric","F1_Score","Classification_F1-Score"], | |
| "MS Office": ["MS_Office","MS Office","Microsoft Office","MS Word","MS_Word","Microsoft_Office","Microsoft_Word","Microsoft Word"], | |
| "Subquery": ["Subquery","Sub-query","Nested Query","Inner Query"], | |
| "SQL Join": ["SQL Join","Join in SQL","Join"], | |
| "Stemming": ["Stemming","Stemming Algorithm","Word Stemming","Stemming Techniques","Stemming in NLP","Text_Stemming","Text Stemming"], | |
| "Stopwords": ["Stopwords","Stop Words","Common Words","Text Stopwords","Stopwords Removal","Removing Stopwords","Stopwords List","Stopwords in NLP"], | |
| "docker_variations" : ["Docker Integration","Docker", "Docker Automation", "Advanced Docker","Advanced_Docker", "Docker Tools"], | |
| "jenkins_variations" : ["Jenkins","Jenkins CI/CD","CI/CD", "Jenkins Automation", "Jenkins Pipeline", "Jenkins Plugins"], | |
| "prometheus_variations" : ["Prometheus Monitoring", "Prometheus Metrics", "PromQL", "Prometheus Alerting"], | |
| "cicd_variations" : ["Continuous Integration", "Continuous Deployment", "CI/CD Automation", "CI/CD Tools"], | |
| "flask_variations" : ["Flask","Flask Framework", "Flask RESTful", "Flask Deployment", "Flask Security"], | |
| "fastapi_variations" : ["FastAPI","FastAPI Framework", "FastAPI RESTful","FastAPI_RESTful", "FastAPI Deployment", "FastAPI Tools","FastAPI_Tools"], | |
| "django_variations" : ["Django Framework", "Django Web Development", "Django REST Framework", "Django Deployment","Django"], | |
| "aws_variations" : ["Amazon Web Services", "AWS Cloud", "AWS Services", "AWS Management","AWS","AWS_Cloud"], | |
| "statistics_variations" : ["Statistical Analysis", "Descriptive Statistics", "Inferential Statistics", "Probability Theory"], | |
| "hypothesis_testing_variations" : ["Null Hypothesis", "Alternative Hypothesis", "Significance Level", "Type I Error"], | |
| "smote_variations" : ["Synthetic Minority Over-sampling Technique", "SMOTE Algorithm", "SMOTE Python", "SMOTE Applications","SMOTE"], | |
| "mlflow_variations" : ["MLflow Framework", "MLflow Tracking", "MLflow Deployment", "MLflow Integration","MLflow"], | |
| "packaging_variations" : ["Software Packaging", "Package Management", "Python Packaging", "Packaging Best Practices"], | |
| "version_control_variations" : ["Git Version Control", "Git Commands", "Git Workflow", "Git Collaboration","Git"], | |
| "communication skills" : ["communication_skills" , "communication_skill" ,"communication skills"], | |
| "problem-solving": ["problem-solving","problem_solving", "problem-solving"], | |
| "decision making" : ["decision making" , "decision-making","decision_making"] | |
| } | |
| Extract_sections = ["CAREER OBJECTIVE", "PROFILE SUMMARY"] | |
| section_headers = [ | |
| "CAREER OBJECTIVE", "PROFILE SUMMARY", "WORK EXPERIENCE", "EDUCATION","ADDITIONAL INFORMATION AND HOBBIES", | |
| "ACADEMIC PROFILE", "PROJECTS", "CERTIFICATIONS","SKILLS", | |
| "PERSONAL SKILLS", "PERSONAL INFORMATION", "REFERENCES", | |
| "EXTRACURRICULAR ACTIVITIES", "TECHNICAL SKILLS", "KEY SKILLS", | |
| "ADDITIONAL INFORMATION", "CERTIFICATIONS & ACADEMIC ENDEAVOURS", | |
| "AWARDS & ACCOLADES", "SOFTWARE SKILLS", "AWARDS" | |
| ] | |
| common_projects = ["Titanic","Iris","MNIST", "COVID-19", "Bank Churn", | |
| "Spam","Handwritten Digit","Heart Disease","House Price", | |
| "Diabetes","Twitter", "Churn", | |
| "Wine Quality", "Loan","Titanic Survival Prediction", | |
| "Iris Flower Classification", | |
| "House Price Prediction", | |
| "MNIST Handwritten Digit Recognition", | |
| "Customer Churn Prediction", | |
| "Sentiment Analysis of Movie Reviews", | |
| "Spam Email Detection", | |
| "Fake News Detection", | |
| "Image Classification with CNNs", | |
| "Stock Price Prediction"] | |
| suggested_projects = ["Predicting Patient Readmissions in Hospitals", | |
| "Optimizing Ad Spend with Machine Learning Models","Developing a Fake News Detection System", | |
| "Developing an AI Chatbot for Customer Service Automation","Personalized Health Recommendations Using Wearable Data"] | |
| # Specify rule IDs and error keywords to ignore | |
| ignore_rule_ids = ['WHITESPACE_RULE'] | |
| ignore_error_keywords = ['repeated a whitespace'] | |
| # Blogs & Articles | |
| blog_articles = ["https://www.dataquest.io/blog/how-data-science-resume-cv/", | |
| "https://medium.com/data-science-at-microsoft/writing-a-resume-for-a-data-science-role-345b98bdf80b", | |
| "https://medium.com/@alicechen.ai/resume-201-how-to-write-an-effective-data-science-resume-441cbe6c0932" | |
| ] | |
| # Links | |
| youtube_links = ["https://youtu.be/Tt08KmFfIYQ?si=EdebdWUfbttysrfL", | |
| "https://youtu.be/R3abknwWX7k?si=m4EyviXgKDoPgIGr", | |
| "https://youtu.be/1-z9ptlBar4?si=lA7WgU4j4MFGjBZV", | |
| "https://youtu.be/pjqi_M3SPwY?si=5aRizcfpreKR9xUr", | |
| "https://youtu.be/ROfceyeD7f4?si=OTbrL7BUKSW1u2mt"] |