bangaboy commited on
Commit
76a67a2
·
verified ·
1 Parent(s): 3c1d02d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +245 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,247 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import google.generativeai as genai
3
+ from PIL import Image
4
+ import fitz # PyMuPDF
5
+ from docx import Document
6
+ import json
7
+ from pathlib import Path
8
+ from datetime import datetime
9
+ import re
10
+ import pytesseract
11
+ import io
12
 
13
+ def extract_text_from_pdf(pdf_file):
14
+ """Extract text from uploaded PDF file."""
15
+ text_content = []
16
+ try:
17
+ pdf_bytes = pdf_file.read()
18
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
19
+ for page_num in range(len(doc)):
20
+ page = doc[page_num]
21
+ text_content.append(page.get_text())
22
+ return "\n".join(text_content)
23
+ except Exception as e:
24
+ st.error(f"Error in PDF extraction: {str(e)}")
25
+ return ""
26
+
27
+ def extract_text_from_docx(docx_file):
28
+ """Extract text from uploaded DOCX file."""
29
+ try:
30
+ doc = Document(docx_file)
31
+ text_content = []
32
+ for paragraph in doc.paragraphs:
33
+ text_content.append(paragraph.text)
34
+ return "\n".join(text_content)
35
+ except Exception as e:
36
+ st.error(f"Error in DOCX extraction: {str(e)}")
37
+ return ""
38
+
39
+ def parse_date(date_str):
40
+ """Parse date from various formats."""
41
+ try:
42
+ # Handle 'Present' or 'Current'
43
+ if date_str.lower() in ['present', 'current', 'now']:
44
+ return datetime.now()
45
+
46
+ date_str = date_str.strip()
47
+
48
+ formats = [
49
+ '%Y', '%b %Y', '%B %Y', '%m/%Y', '%m-%Y',
50
+ '%Y/%m', '%Y-%m'
51
+ ]
52
+
53
+ for fmt in formats:
54
+ try:
55
+ return datetime.strptime(date_str, fmt)
56
+ except ValueError:
57
+ continue
58
+
59
+ year_match = re.search(r'\b20\d{2}\b', date_str)
60
+ if year_match:
61
+ return datetime.strptime(year_match.group(), '%Y')
62
+
63
+ return None
64
+ except Exception:
65
+ return None
66
+
67
+ def calculate_experience(work_history):
68
+ """Calculate total years of experience from work history."""
69
+ total_experience = 0
70
+ current_year = datetime.now().year
71
+
72
+ for job in work_history:
73
+ duration = job.get('duration', '')
74
+ if not duration:
75
+ continue
76
+
77
+ parts = re.split(r'\s*-\s*|\s+to\s+', duration)
78
+ if len(parts) != 2:
79
+ continue
80
+
81
+ start_date = parse_date(parts[0])
82
+ end_date = parse_date(parts[1])
83
+
84
+ if start_date and end_date:
85
+ years = (end_date.year - start_date.year) + \
86
+ (end_date.month - start_date.month) / 12
87
+ total_experience += max(0, years)
88
+
89
+ return round(total_experience, 1)
90
+
91
+ def parse_resume(file_uploaded, api_key):
92
+ """Parse resume and extract information."""
93
+ genai.configure(api_key=api_key)
94
+ model = genai.GenerativeModel('gemini-1.5-flash')
95
+
96
+ prompt = """Extract the following information from this resume:
97
+ 1. Summarize the following resume in 100 words, focusing on key skills, experience, and qualifications
98
+ 2. Full Name
99
+ 3. Email Address
100
+ 4. Phone Number
101
+ 5. Education History (including degree, institution, graduation year, and field of study)
102
+ 6. Companies worked at with positions and EXACT duration (e.g., "Jan 2020 - Present" or "2018-2020")
103
+ 7. Skills
104
+ 8. LinkedIn Profile URL
105
+
106
+ Return the information in this JSON format:
107
+ {
108
+ "summary": "",
109
+ "name": "",
110
+ "email": "",
111
+ "phone": "",
112
+ "education": [
113
+ {
114
+ "degree": "",
115
+ "institution": "",
116
+ "year": "",
117
+ "field": "",
118
+ "gpa": ""
119
+ }
120
+ ],
121
+ "work_experience": [
122
+ {
123
+ "company": "",
124
+ "position": "",
125
+ "duration": ""
126
+ }
127
+ ],
128
+ "skills": [],
129
+ "linkedin": ""
130
+ }
131
+ For skills include tools and technologies in output if present any in resume.
132
+ For work experience durations, please specify exact dates in format: "MMM YYYY - MMM YYYY" or "YYYY - Present" , please return in one order either in ascending or descending.
133
+ Only return the JSON object, nothing else. If any field is not found, leave it empty."""
134
+
135
+ try:
136
+ file_extension = Path(file_uploaded.name).suffix.lower()
137
+
138
+ if file_extension == '.pdf':
139
+ text_content = extract_text_from_pdf(file_uploaded)
140
+ elif file_extension in ['.docx', '.doc']:
141
+ text_content = extract_text_from_docx(file_uploaded)
142
+ elif file_extension in ['.jpg', '.jpeg', '.png']:
143
+ image = Image.open(file_uploaded)
144
+ text_content = pytesseract.image_to_string(image)
145
+ else:
146
+ st.error(f"Unsupported file format: {file_extension}")
147
+ return None
148
+
149
+ response = model.generate_content(f"{prompt}\n\nResume Text:\n{text_content}")
150
+
151
+ try:
152
+ response_text = response.text
153
+ json_start = response_text.find('{')
154
+ json_end = response_text.rfind('}') + 1
155
+ json_str = response_text[json_start:json_end]
156
+
157
+ result = json.loads(json_str)
158
+ total_exp = calculate_experience(result.get('work_experience', []))
159
+ result['total_years_experience'] = total_exp
160
+
161
+ return result
162
+ except json.JSONDecodeError as e:
163
+ st.error(f"Error parsing response: {str(e)}")
164
+ return None
165
+
166
+ except Exception as e:
167
+ st.error(f"Error processing resume: {str(e)}")
168
+ return None
169
+
170
+ def format_education(edu):
171
+ """Format education details for display."""
172
+ parts = []
173
+ if edu.get('degree'):
174
+ parts.append(edu['degree'])
175
+ if edu.get('field'):
176
+ parts.append(f"in {edu['field']}")
177
+ if edu.get('institution'):
178
+ parts.append(f"from {edu['institution']}")
179
+ if edu.get('year'):
180
+ parts.append(f"({edu['year']})")
181
+ if edu.get('gpa') and edu['gpa'].strip():
182
+ parts.append(f"- GPA: {edu['gpa']}")
183
+ return " ".join(parts)
184
+
185
+ def main():
186
+ st.title("Resume Parser")
187
+ st.write("Upload a resume (PDF, DOCX, or Image) to extract information")
188
+
189
+ # Get API key from secrets or user input
190
+ api_key = st.secrets["GEMINI_API_KEY"] if "GEMINI_API_KEY" in st.secrets else st.text_input("Enter Gemini API Key", type="password")
191
+
192
+ uploaded_file = st.file_uploader("Choose a resume file", type=["pdf", "docx", "doc", "jpg", "jpeg", "png"])
193
+
194
+ if uploaded_file and api_key:
195
+ with st.spinner('Analyzing resume...'):
196
+ result = parse_resume(uploaded_file, api_key)
197
+
198
+ if result:
199
+ st.subheader("Extracted Information")
200
+
201
+ # Display summary in a text area
202
+ st.text_area("Summary", result.get('summary', 'Not found'), height=100)
203
+
204
+ # Display personal information
205
+ col1, col2, col3 = st.columns(3)
206
+ with col1:
207
+ st.write("**Name:**", result.get('name', 'Not found'))
208
+ with col2:
209
+ st.write("**Email:**", result.get('email', 'Not found'))
210
+ with col3:
211
+ st.write("**Phone:**", result.get('phone', 'Not found'))
212
+
213
+ # Display total experience
214
+ total_exp = result.get('total_years_experience', 0)
215
+ exp_text = f"{total_exp:.1f} years" if total_exp >= 1 else f"{total_exp * 12:.0f} months"
216
+ st.write("**Total Experience:**", exp_text)
217
+
218
+ # Display education
219
+ st.subheader("Education")
220
+ if result.get('education'):
221
+ for edu in result['education']:
222
+ st.write(f"- {format_education(edu)}")
223
+ else:
224
+ st.write("No education information found")
225
+
226
+ # Display work experience
227
+ st.subheader("Work Experience")
228
+ if result.get('work_experience'):
229
+ for exp in result['work_experience']:
230
+ duration = f" ({exp.get('duration', 'Duration not specified')})" if exp.get('duration') else ""
231
+ st.write(f"- {exp.get('position', 'Role not found')} at {exp.get('company', 'Company not found')}{duration}")
232
+ else:
233
+ st.write("No work experience found")
234
+
235
+ # Display Skills
236
+ st.subheader("Skills:")
237
+ if result.get('skills'):
238
+ for skill in result['skills']:
239
+ st.write(f"- {skill}")
240
+ else:
241
+ st.write("- No skills found")
242
+
243
+ # Display LinkedIn profile
244
+ st.write("**LinkedIn Profile:**", result.get('linkedin', 'Not found'))
245
+
246
+ if __name__ == "__main__":
247
+ main()