ATllll commited on
Commit
7b56d8f
Β·
verified Β·
1 Parent(s): 8a395b0

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +102 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,104 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import fitz # PyMuPDF
3
+ import pandas as pd
4
+ import nltk
5
+ import re
6
+ from sentence_transformers import SentenceTransformer, util
7
+ from nltk.corpus import stopwords
8
+ from nltk.tokenize import word_tokenize
9
+ import os
10
+
11
+ # Download NLTK data only if not already downloaded
12
+ nltk_data_dir = os.path.join(os.path.expanduser("~"), "nltk_data")
13
+ nltk.download('punkt', download_dir=nltk_data_dir)
14
+ nltk.download('stopwords', download_dir=nltk_data_dir)
15
+ nltk.data.path.append(nltk_data_dir)
16
+
17
+ # Set up Streamlit page
18
+ st.set_page_config(page_title="BERT Resume Matcher", layout="wide")
19
+ st.title("πŸ€– AI Resume Matcher using BERT")
20
+ st.markdown("Upload resumes and a job description β€” see similarity scores using **semantic NLP** and keyword matching.")
21
+
22
+ # Function to extract text from a PDF
23
+ def extract_text_from_pdf(pdf_file):
24
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
25
+ text = ""
26
+ for page in doc:
27
+ text += page.get_text()
28
+ return text
29
+
30
+ # Function to extract cleaned keywords from text
31
+ def extract_keywords(text):
32
+ tokens = word_tokenize(text.lower())
33
+ stop_words = set(stopwords.words('english'))
34
+
35
+ # Custom stopwords (non-skill filler words)
36
+ custom_stopwords = {
37
+ 'basic', 'knowledge', 'either', 'ctc', 'good', 'lpa', 'per', 'month',
38
+ 'year', 'strong', 'skills', 'required', 'looking', 'fresher',
39
+ 'candidate', 'experience', 'preferred', 'concepts'
40
+ }
41
+
42
+ # Remove non-alphabetic tokens and filter
43
+ words = [re.sub(r'\W+', '', word) for word in tokens if word.isalpha()]
44
+ keywords = [word for word in words if word not in stop_words and word not in custom_stopwords and len(word) > 2]
45
+
46
+ return set(keywords)
47
+
48
+ # Upload UI
49
+ uploaded_files = st.file_uploader("πŸ“€ Upload Resumes (PDF)", type="pdf", accept_multiple_files=True)
50
+ job_desc = st.text_area("πŸ“ Paste Job Description Here", height=200)
51
+
52
+ if st.button("πŸš€ Match Resumes"):
53
+ if uploaded_files and job_desc.strip():
54
+ resume_texts = []
55
+ resume_names = []
56
+
57
+ for file in uploaded_files:
58
+ try:
59
+ text = extract_text_from_pdf(file)
60
+ resume_texts.append(text)
61
+ resume_names.append(file.name)
62
+ except Exception as e:
63
+ st.error(f"❌ Error processing {file.name}: {str(e)}")
64
+
65
+ # Load Sentence-BERT model
66
+ with st.spinner("πŸ” Computing similarity..."):
67
+ model = SentenceTransformer('all-MiniLM-L6-v2')
68
+
69
+ # Encode job description and resumes
70
+ all_docs = [job_desc] + resume_texts
71
+ embeddings = model.encode(all_docs, convert_to_tensor=True)
72
+
73
+ job_embedding = embeddings[0]
74
+ resume_embeddings = embeddings[1:]
75
+ semantic_scores = util.cos_sim(job_embedding, resume_embeddings).flatten().tolist()
76
+
77
+ # Extract job keywords
78
+ job_keywords = extract_keywords(job_desc)
79
+ results = []
80
+
81
+ for i in range(len(resume_texts)):
82
+ resume_keywords = extract_keywords(resume_texts[i])
83
+ matched = job_keywords & resume_keywords
84
+ missing = job_keywords - resume_keywords
85
+ match_ratio = len(matched) / len(job_keywords) if job_keywords else 0
86
+
87
+ results.append({
88
+ "Resume": resume_names[i],
89
+ "Semantic Score (0–100)": round(semantic_scores[i] * 100, 2),
90
+ "Skill Match (%)": round(match_ratio * 100, 2),
91
+ "Matched Keywords": ", ".join(sorted(matched)),
92
+ "Missing Keywords": ", ".join(sorted(missing))
93
+ })
94
+
95
+ results_df = pd.DataFrame(results).sort_values(by="Semantic Score (0–100)", ascending=False).reset_index(drop=True)
96
+
97
+ st.success("βœ… Matching complete!")
98
+ st.dataframe(results_df)
99
 
100
+ # Download CSV
101
+ csv = results_df.to_csv(index=False).encode('utf-8')
102
+ st.download_button("πŸ“₯ Download Results as CSV", csv, "resume_match_results.csv", "text/csv")
103
+ else:
104
+ st.warning("⚠️ Please upload resumes and enter a job description before matching.")