muddasser commited on
Commit
32f9c05
Β·
verified Β·
1 Parent(s): 1736898

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +25 -0
  2. README.md +32 -0
  3. requirements.txt +16 -0
  4. resume.py +107 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+
5
+ # Install essential packages
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ libpq-dev \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Set working directory
12
+ WORKDIR /home/user/app
13
+
14
+ # Copy your code
15
+ COPY . .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir --upgrade pip
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Expose port for Streamlit
22
+ EXPOSE 8501
23
+
24
+ # Run app
25
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: AI Resume Screening App
3
+ emoji: πŸ”
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: streamlit
7
+ sdk_version: 1.35.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # πŸ” AI Resume Screening App
13
+
14
+ This project is an AI-powered resume screening tool built with Python, Streamlit, spaCy, and scikit-learn. It processes PDF and DOCX resumes, extracts skills and experience, and computes a match score against a job description.
15
+
16
+ ## πŸš€ Features
17
+ - Extracts text from PDF and DOCX resumes.
18
+ - Identifies user-defined skills and estimates years of experience.
19
+ - Computes a cosine similarity score between resume and job description.
20
+ - Streamlit UI for deployment on Hugging Face Spaces.
21
+ - Runs on CPU.
22
+
23
+ ## πŸ“¦ Setup on Hugging Face Spaces
24
+ 1. Upload resumes (PDF/DOCX) to `/data/resumes` via the Files tab (optional, as Streamlit handles uploads).
25
+ 2. Access the Streamlit interface and enter a job description and required skills (comma-separated).
26
+ 3. Upload resumes and view screening results.
27
+ 4. Check `/data/resumes` for persistent storage of uploaded files.
28
+
29
+ ## πŸ“‹ Requirements
30
+ Install dependencies using:
31
+ ```bash
32
+ pip install -r requirements.txt
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core ML
2
+ numpy
3
+ scipy
4
+ pandas
5
+ scikit-learn
6
+
7
+ # Text Processing
8
+ spacy
9
+ pdfplumber
10
+ docx2txt
11
+
12
+ # Web Framework
13
+ streamlit==1.35.0
14
+
15
+ # spaCy English Model
16
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
resume.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pdfplumber
3
+ import docx2txt
4
+ import spacy
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import os
8
+
9
+ # Load the English NLP model from spaCy
10
+ @st.cache_resource
11
+ def load_spacy_model():
12
+ return spacy.load('en_core_web_sm')
13
+
14
+ nlp = load_spacy_model()
15
+
16
+ # Function to extract text from a PDF file
17
+ def extract_text_from_pdf(pdf_file):
18
+ text = ''
19
+ with pdfplumber.open(pdf_file) as pdf:
20
+ for page in pdf.pagesSNS
21
+ page_text = page.extract_text()
22
+ if page_text:
23
+ text += page_text
24
+ return text
25
+
26
+ # Function to extract text from a DOCX file
27
+ def extract_text_from_docx(docx_file):
28
+ return docx2txt.process(docx_file)
29
+
30
+ # Function to extract user-defined skills from resume text
31
+ def extract_skills(text, user_skills):
32
+ text = text.lower()
33
+ extracted = [skill.strip().lower() for skill in user_skills if skill.strip().lower() in text]
34
+ return list(set(extracted)) # remove duplicates
35
+
36
+ # Function to estimate years of experience from dates mentioned
37
+ def extract_experience(text):
38
+ doc = nlp(text)
39
+ years = []
40
+ for ent in doc.ents:
41
+ if ent.label_ == 'DATE':
42
+ try:
43
+ if 'year' in ent.text.lower():
44
+ num = int(ent.text.split()[0])
45
+ years.append(num)
46
+ except:
47
+ continue
48
+ return max(years, default=0)
49
+
50
+ # Function to compute a similarity score between resume and job description
51
+ def match_score(resume_text, job_description):
52
+ documents = [resume_text, job_description]
53
+ tfidf = TfidfVectorizer(stop_words='english')
54
+ tfidf_matrix = tfidf.fit_transform(documents)
55
+ score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
56
+ return round(float(score[0][0]) * 100, 2)
57
+
58
+ # -------- Streamlit Frontend Starts Here -------- #
59
+
60
+ st.title("πŸ” AI Resume Screening App")
61
+
62
+ # Text area for job description
63
+ job_description = st.text_area("πŸ“„ Paste the Job Description Below:", height=200)
64
+
65
+ # Text input for skills (comma-separated)
66
+ skills_input = st.text_input("πŸ› οΈ Enter Required Skills (comma-separated):", placeholder="e.g., Python, SQL, Machine Learning")
67
+
68
+ # File uploader for multiple resumes
69
+ uploaded_files = st.file_uploader("πŸ“‚ Upload Resume Files (PDF/DOCX)", type=['pdf', 'docx'], accept_multiple_files=True)
70
+
71
+ # Main logic to process resumes
72
+ if uploaded_files and job_description and skills_input:
73
+ # Parse user-entered skills
74
+ user_skills = [skill.strip() for skill in skills_input.split(',') if skill.strip()]
75
+
76
+ if not user_skills:
77
+ st.warning("⚠️ Please enter at least one skill.")
78
+ else:
79
+ st.markdown("### πŸ”Ž Screening Results")
80
+
81
+ # Save uploaded files to /data for persistent storage
82
+ os.makedirs('/data/resumes', exist_ok=True)
83
+ for resume in uploaded_files:
84
+ resume_path = os.path.join('/data/resumes', resume.name)
85
+ with open(resume_path, 'wb') as f:
86
+ f.write(resume.read())
87
+
88
+ # Extract text
89
+ if resume.name.endswith('.pdf'):
90
+ resume_text = extract_text_from_pdf(resume_path)
91
+ elif resume.name.endswith('.docx'):
92
+ resume_text = extract_text_from_docx(resume_path)
93
+ else:
94
+ st.warning(f"Unsupported file type: {resume.name}")
95
+ continue
96
+
97
+ # Extract information
98
+ skills = extract_skills(resume_text, user_skills)
99
+ experience = extract_experience(resume_text)
100
+ score = match_score(resume_text, job_description)
101
+
102
+ # Display results
103
+ st.subheader(f"πŸ‘€ Candidate: {resume.name}")
104
+ st.write(f"βœ… **Skills Matched**: {', '.join(skills) if skills else 'None'}")
105
+ st.write(f"🧠 **Estimated Experience**: {experience} year(s)")
106
+ st.write(f"πŸ“Š **Match Score**: {score}%")
107
+ st.markdown("---")