Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- app.py +118 -0
- parsed_resume.json +1 -0
- requirements.txt +9 -0
app.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from flask import Flask, jsonify, request, flash, redirect, url_for, render_template
|
| 3 |
+
from src.submitter import ResumeSubmitter
|
| 4 |
+
from src.reviewer import ResumeReviewer
|
| 5 |
+
from src.resume_parser import ResumeParser
|
| 6 |
+
from src.logging_config import setup_logging
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
app = Flask(__name__, template_folder=os.path.join(os.path.dirname(__file__), 'templates'))
|
| 10 |
+
app.secret_key = 'supersecretkey'
|
| 11 |
+
setup_logging()
|
| 12 |
+
|
| 13 |
+
@app.route('/v1/resumes/', methods=['POST', 'GET'])
|
| 14 |
+
def submit_resume():
|
| 15 |
+
if request.method == 'POST':
|
| 16 |
+
result = ResumeSubmitter().upload_file()
|
| 17 |
+
if os.path.exists(result):
|
| 18 |
+
resume_path = result # Get the path of the uploaded resume
|
| 19 |
+
try:
|
| 20 |
+
return redirect(url_for('get_reviews', path=resume_path))
|
| 21 |
+
except Exception as e:
|
| 22 |
+
app.logger.error("Failed to redirect to /v1/reviews/: %s", str(e))
|
| 23 |
+
return jsonify(message="Failed to redirect to reviews page"), 500
|
| 24 |
+
else:
|
| 25 |
+
return jsonify(message=f"Failed to submit resume, {result}"), 400
|
| 26 |
+
else:
|
| 27 |
+
return ResumeSubmitter().upload_form()
|
| 28 |
+
|
| 29 |
+
@app.route("/v1/reviews/<path:path>", methods=['POST', 'GET'])
|
| 30 |
+
def get_reviews(path):
|
| 31 |
+
app.logger.debug("Inside get_reviews")
|
| 32 |
+
resume_parser = ResumeParser()
|
| 33 |
+
resume_reviewer = ResumeReviewer()
|
| 34 |
+
|
| 35 |
+
# Parse resume
|
| 36 |
+
parsed_resume_response = resume_parser.parse_text(path)
|
| 37 |
+
|
| 38 |
+
# Ensure JSON format
|
| 39 |
+
try:
|
| 40 |
+
parsed_resume_dict = json.loads(parsed_resume_response.data)
|
| 41 |
+
print(parsed_resume_dict)
|
| 42 |
+
except json.JSONDecodeError:
|
| 43 |
+
app.logger.error("Failed to decode JSON from the response")
|
| 44 |
+
return "Invalid JSON response from parser", 500
|
| 45 |
+
|
| 46 |
+
# Perform detailed scoring
|
| 47 |
+
score_breakdown = resume_parser.imarticus_detailed_score(
|
| 48 |
+
name=parsed_resume_dict.get("name"),
|
| 49 |
+
contact_number=parsed_resume_dict.get("contact_number"),
|
| 50 |
+
email=parsed_resume_dict.get("email"),
|
| 51 |
+
linkedin_urls=parsed_resume_dict.get("linkedin_urls"),
|
| 52 |
+
github_url=parsed_resume_dict.get("github_urls"),
|
| 53 |
+
missing_sections=parsed_resume_dict.get("missing_sections"),
|
| 54 |
+
sections_not_capitalized=parsed_resume_dict.get("sections_not_capitalized"),
|
| 55 |
+
common_projects=parsed_resume_dict.get("common_projects"),
|
| 56 |
+
section_order_suggestion=parsed_resume_dict.get("section_order_suggestion"),
|
| 57 |
+
sections_text=parsed_resume_dict.get("sections_text"),
|
| 58 |
+
skills=parsed_resume_dict.get("skills"),
|
| 59 |
+
relevant_experience_score=parsed_resume_dict.get("relevant_experience_score", 0)
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Perform grammar and spelling checks
|
| 63 |
+
section_grammar_check_issues, grammar_penalty = resume_reviewer.grammar_check(parsed_resume_dict)
|
| 64 |
+
|
| 65 |
+
# Default values in case of None
|
| 66 |
+
if not isinstance(section_grammar_check_issues, dict):
|
| 67 |
+
section_grammar_check_issues = {}
|
| 68 |
+
if not isinstance(grammar_penalty, int):
|
| 69 |
+
grammar_penalty = 0
|
| 70 |
+
|
| 71 |
+
parsed_resume_dict["imarticus_score"] = parsed_resume_dict.get("imarticus_score", 0) # Key initialization
|
| 72 |
+
parsed_resume_dict["imarticus_score"] += grammar_penalty # Apply penalty
|
| 73 |
+
|
| 74 |
+
# Store grammar issues
|
| 75 |
+
parsed_resume_dict["section_grammar_check_issues"] = section_grammar_check_issues
|
| 76 |
+
|
| 77 |
+
# Merge grammar & spelling issues into one section
|
| 78 |
+
all_issues = []
|
| 79 |
+
for section, issues in section_grammar_check_issues.items():
|
| 80 |
+
all_issues.extend(issues.get("grammar_issues", []))
|
| 81 |
+
all_issues.extend(issues.get("spelling_errors", []))
|
| 82 |
+
|
| 83 |
+
parsed_resume_dict["grammar_and_spelling_issues"] = all_issues
|
| 84 |
+
|
| 85 |
+
# Add detailed score breakdown to parsed_resume_dict
|
| 86 |
+
parsed_resume_dict.update(score_breakdown)
|
| 87 |
+
|
| 88 |
+
# Ensure all scoring sections are present for better visualization
|
| 89 |
+
required_score_fields = [
|
| 90 |
+
"name_score", "contact_number_score", "email_score", "linkedin_url_score", "github_url_score",
|
| 91 |
+
"missing_sections_score", "common_projects_score", "section_order_score", "projects_score",
|
| 92 |
+
"certifications_score", "relevant_experience_score", "ds_skills_score", "extra_urls_bonus",
|
| 93 |
+
"summary_score", "project_link_score"
|
| 94 |
+
]
|
| 95 |
+
for field in required_score_fields:
|
| 96 |
+
if field not in parsed_resume_dict:
|
| 97 |
+
parsed_resume_dict[field] = 0
|
| 98 |
+
|
| 99 |
+
# Debugging logs
|
| 100 |
+
app.logger.debug(f"Grammar & Spelling Issues: {json.dumps(parsed_resume_dict['grammar_and_spelling_issues'], indent=2)}")
|
| 101 |
+
|
| 102 |
+
# Save parsed results (optional)
|
| 103 |
+
with open('parsed_resume.json', 'w') as json_file:
|
| 104 |
+
json.dump(parsed_resume_dict, json_file)
|
| 105 |
+
|
| 106 |
+
# Pass updated data to template
|
| 107 |
+
return render_template("review_output.html", parsed_resume=parsed_resume_dict)
|
| 108 |
+
|
| 109 |
+
@app.route("/v1/users/<int:id>", methods=['GET'])
|
| 110 |
+
def get_user(id):
|
| 111 |
+
return jsonify(message="User retrieved successfully for given ID {}".format(id))
|
| 112 |
+
|
| 113 |
+
@app.route('/', methods=['GET'])
|
| 114 |
+
def greet():
|
| 115 |
+
return render_template('home_page.html')
|
| 116 |
+
|
| 117 |
+
if __name__ == '__main__':
|
| 118 |
+
app.run(host='0.0.0.0', port=8080, debug=True)
|
parsed_resume.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"basic_information_section": ["linkedin_urls", "github_urls"], "certifications": {"found": false, "message": "No Imarticus certification found in the provided text.", "text": ""}, "common_projects": "Common projects found in Projects section: Churn, Customer Churn Prediction", "contact_number": "91 9876543210", "contact_suggestion": "", "education_order_suggestion": "", "email": "johndoe@example.com", "email_suggestion": "", "experience_order_suggestion": "WORK EXPERIENCE section is in chronological order.", "extra_urls": {}, "font_suggestions": ["Formatting issue at Page: 1, Text: WORK, Reason: size not 12.0, Found font size: 14.0, Found font name: Helvetica", "Formatting issue at Page: 1, Text: EXPERIENCE, Reason: size not 12.0, Found font size: 14.0, Found font name: Helvetica", "Formatting issue at Page: 2, Text: EXTRA, Reason: size not 12.0, Found font size: 14.0, Found font name: Helvetica", "Formatting issue at Page: 2, Text: LINKS, Reason: size not 12.0, Found font size: 14.0, Found font name: Helvetica", "Formatting issue at Page: 2, Text: EDUCATION, Reason: size not 12.0, Found font size: 14.0, Found font name: Helvetica"], "found_certification": "Imarticus certification found in Certifications section.", "found_keywords": ["Python", "SQL", "Matplotlib", "Seaborn", "Tableau", "Hadoop", "Spark", "spacy", "XGBoost", "Machine Learning", "Deep Learning", "Visualization", "Power BI", "Random Forest", "Big Data", "NLP", " AI ", "Accuracy", "AWS", "Git"], "github_urls": null, "github_urls_suggestions": "add the github_urls to the resume.", "grammer_issues_by_section": {"CAREER OBJECTIVE": {"grammar_issues": [], "spelling_errors": []}, "PROFILE SUMMARY": {"grammar_issues": [{"context": "...KEYWORD_5}, and Big Data. Proficient in Python, SQL, {KEYWORD...", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}, {"context": "...Python, SQL, Spark, and Databricks. Strong background in...", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}, {"context": "...ORD_0}, SQL, Spark, and Databricks. Strong background in ETL...", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}, {"context": "... Spark, and Databricks. Strong background in ETL, Data Warehousing,...", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}, {"context": "...}, and Databricks. Strong background in ETL, Data Warehousing, and Cloud ...", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}, {"context": "...and Databricks. Strong background in ETL, Data Warehousing, and Cloud Comp...", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}, {"context": "...atabricks. Strong background in ETL, Data Warehousing, and Cloud Computing....", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}, {"context": "...cks. Strong background in ETL, Data Warehousing, and Cloud Computing. Pass...", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}, {"context": "...background in ETL, Data Warehousing, and Cloud Computing. Passionate about s...", "error": "It seems like there are too many consecutive spaces here.", "rule_id": "CONSECUTIVE_SPACES", "suggested_correction": [" "]}], "spelling_errors": []}}, "imarticus_score": 25, "linkedin_urls": null, "linkedin_urls_suggestion": "Add the LinkedIn URLs to the resume.", "missing_sections": ["ACADEMIC PROFILE", "TECHNICAL SKILLS", "CAREER OBJECTIVE"], "missing_skills": ["Data Analysis", "ANN", "RNN", "NumPy", "CNN", "Statsmodels", "MS Excel", "Data Visualization", "MySQL", "SciKit Learn"], "name": "John Doe", "name_suggestion": "", "project_length_suggestion": "No projects found. Consider at least 2 projects.", "quality": "Low", "recommended_blogs": ["https://www.dataquest.io/blog/how-data-science-resume-cv/", "https://medium.com/data-science-at-microsoft/writing-a-resume-for-a-data-science-role-345b98bdf80b"], "recommended_youtube_links": ["https://youtu.be/ROfceyeD7f4?si=OTbrL7BUKSW1u2mt", "https://youtu.be/pjqi_M3SPwY?si=5aRizcfpreKR9xUr"], "section_order_suggestion": "WORK EXPERIENCE should come before PROJECTS", "skills": ["Python", "SQL", "Tableau", "Machine Learning", "Deep Learning", "Power BI"], "spelling_suggestions": [], "text": "John Doe\n\n+91 9876543210 | johndoe@example.com\n\nLinkedIn: linkedin.com/in/johndoe | GitHub: github.com/johndoe\n\nPROFILE SUMMARY\n\nData Scientist with 5+ years of experience in Machine Learning, NLP, and Big Data. \n\nProficient in Python, SQL, Spark, and Databricks. Strong background in ETL, Data Warehousing,\n\nand Cloud Computing. \n\nPassionate about solving complex business problems using data-driven approaches.\n\nWORK EXPERIENCE\n\nSenior Data Scientist, XYZ Corp (Jan 2020 - Present)\n\n- Developed ML models for predictive analytics, reducing operational costs by 20%.\n\n- Built ETL pipelines using Spark & Databricks to process large-scale datasets.\n\n- Led a team of 5 data scientists and implemented hyperparameter tuning to improve model\n\naccuracy by 15%.\n\nData Analyst, ABC Ltd. (Jan 2017 - Dec 2019)\n\n- Designed data warehousing solutions for business intelligence, reducing report generation time by\n\n40%.\n\n- Created interactive dashboards using Power BI & Tableau for senior management.\n\n- Developed SQL-based ETL pipelines to consolidate data from multiple sources.\n\nPROJECTS\n\n1. Customer Churn Prediction (GitHub: github.com/johndoe/churn-prediction)\n\n- Built a predictive model using Random Forest & XGBoost with 85% accuracy.\n\n2. NLP Chatbot for Customer Support (GitHub: github.com/johndoe/nlp-chatbot)\n\n- Developed an AI-powered chatbot using spaCy & Transformer models.\n\n3. \n\nRecommendation \n\nSystem \n\nfor \n\nE-Commerce \n\n(GitHub:\n\ngithub.com/johndoe/recommendation-system)\n\n \n\f- Built a collaborative filtering model to personalize user experience.\n\n4. Fraud Detection in Banking (GitHub: github.com/johndoe/fraud-detection)\n\n- Used Anomaly Detection & Deep Learning to identify fraudulent transactions.\n\nSKILLS\n\nProgramming: Python, SQL, Scala, Spark\n\nMachine Learning: Supervised & Unsupervised Learning, NLP, Deep Learning\n\nData Engineering: ETL, Data Warehousing, Databricks, Snowflake\n\nCloud Platforms: AWS, Azure, GCP\n\nBig Data: Hadoop, PySpark, Kafka\n\nVisualization Tools: Power BI, Tableau, Matplotlib, Seaborn\n\nCERTIFICATIONS\n\nGoogle Data Science Professional Certificate\n\nAWS Certified Data Engineer\n\nIBM AI Engineering Professional Certificate\n\nCoursera Deep Learning Specialization\n\nImarticus Data Science Certification\n\nEXTRA LINKS\n\nKaggle: kaggle.com/johndoe\n\nLeetCode: leetcode.com/johndoe\n\nMedium Blog: medium.com/@johndoe\n\nHackerrank: hackerrank.com/johndoe\n\nEDUCATION\n\nM.S. in Data Science, Stanford University (2016-2018)\n\nB.Tech in Computer Science, IIT Delhi (2012-2016)\n\n\f", "work_experience_check": "Experience is relevant to Data science.", "section_grammar_check_issues": {"text": {"grammar_issues": [{"context": "...n Tools: Power BI, Tableau, Matplotlib, Seaborn CERTIFICATIONS Google Data Science Pr...", "error": "Possible spelling mistake found.", "rule_id": "MORFOLOGIK_RULE_EN_US", "suggested_correction": ["Seaborne", "Seaborg", "Sea born"]}, {"context": "...XTRA LINKS Kaggle: kaggle.com/johndoe LeetCode: leetcode.com/johndoe Medium Blog: med...", "error": "Possible spelling mistake found.", "rule_id": "MORFOLOGIK_RULE_EN_US", "suggested_correction": ["Electrode", "Encode", "Postcode", "Decode", "Leeched", "Bestrode", "Lestrade", "Geocode", "Re-encode", "Uuencode"]}], "spelling_errors": []}}, "grammar_and_spelling_issues": [{"context": "...n Tools: Power BI, Tableau, Matplotlib, Seaborn CERTIFICATIONS Google Data Science Pr...", "error": "Possible spelling mistake found.", "rule_id": "MORFOLOGIK_RULE_EN_US", "suggested_correction": ["Seaborne", "Seaborg", "Sea born"]}, {"context": "...XTRA LINKS Kaggle: kaggle.com/johndoe LeetCode: leetcode.com/johndoe Medium Blog: med...", "error": "Possible spelling mistake found.", "rule_id": "MORFOLOGIK_RULE_EN_US", "suggested_correction": ["Electrode", "Encode", "Postcode", "Decode", "Leeched", "Bestrode", "Lestrade", "Geocode", "Re-encode", "Uuencode"]}]}
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask
|
| 2 |
+
requests
|
| 3 |
+
pdfminer.six
|
| 4 |
+
PyMuPDF
|
| 5 |
+
spacy
|
| 6 |
+
language-tool-python
|
| 7 |
+
werkzeug
|
| 8 |
+
streamlit
|
| 9 |
+
jdk
|