avimittal30 commited on
Commit
a60a6e6
·
1 Parent(s): 45caa45

pushing chnages for front end

Browse files
Files changed (5) hide show
  1. app.py +26 -8
  2. helper.py +92 -11
  3. index.html +0 -79
  4. parse_job_description.py +4 -0
  5. recommendation.py +4 -5
app.py CHANGED
@@ -2,28 +2,46 @@ from fastapi import FastAPI
2
  from recommendation import calculate_final_score
3
  from pydantic import BaseModel
4
  from typing import List
 
 
5
  from data import resumes_data
6
  from parse_job_description import extract_job_details
 
 
 
7
 
8
  app = FastAPI()
9
 
 
 
 
 
 
 
 
 
 
 
 
10
  class JobDescriptionRequest(BaseModel):
11
  job_description: str # Accepts job description text
12
 
13
  class ResumeResponse(BaseModel):
14
  top_resumes: List[str]
15
 
 
16
 
17
-
18
- output_json={}
19
  @app.post("/candidate_recommendation/", response_model=ResumeResponse)
20
  def get_best_resumes(request: JobDescriptionRequest):
21
  job_description = request.job_description
22
- job_details=extract_job_details(job_description)
23
- df=calculate_final_score(resumes_data(), job_details)
24
- top_resumes=df.head(5)['Resume'].tolist()
 
 
 
25
  return ResumeResponse(top_resumes=top_resumes)
26
 
27
-
28
-
29
-
 
2
  from recommendation import calculate_final_score
3
  from pydantic import BaseModel
4
  from typing import List
5
+ from fastapi.staticfiles import StaticFiles
6
+ from fastapi.responses import FileResponse
7
  from data import resumes_data
8
  from parse_job_description import extract_job_details
9
+ from datetime import datetime
10
+
11
+ from fastapi.middleware.cors import CORSMiddleware
12
 
13
  app = FastAPI()
14
 
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"], #
18
+ allow_credentials=True,
19
+ allow_methods=["*"], #
20
+ allow_headers=["*"], #
21
+ )
22
+
23
+ # Mount the static directory
24
+ app.mount("/static", StaticFiles(directory="static"), name="static")
25
+
26
  class JobDescriptionRequest(BaseModel):
27
  job_description: str # Accepts job description text
28
 
29
  class ResumeResponse(BaseModel):
30
  top_resumes: List[str]
31
 
32
+ # job_details={'Skills': ['Java', 'Python', 'C++', 'Document design processes', 'Development', 'Testing', 'Analytics', 'Troubleshooting', 'Rapid development cycles', 'Scripting', 'Test automation', 'Relational databases', 'ORM', 'SQL technologies', 'HTML5', 'CSS3', 'Content management systems', 'Web application development', 'Wicket', 'GWT', 'Spring MVC'], 'Experience': 6, 'Personality Traits': ['Desire to continue professional growth through training and education'], 'Education': ['B.E. Software Engineering', 'BE Software Engineering', 'Bachelors of Engineering Software Engineering', 'B.Tech Software Engineering', 'B.Tech. Software Engineering', 'BTech Software Engineering', 'Bachelor of Technology Software Engineering', 'B.E. Information Technology', 'BE Information Technology', 'Bachelors of Engineering Information Technology', 'B.Tech Information Technology', 'B.Tech. Information Technology', 'BTech Information Technology', 'Bachelor of Technology Information Technology']}
33
 
 
 
34
  @app.post("/candidate_recommendation/", response_model=ResumeResponse)
35
  def get_best_resumes(request: JobDescriptionRequest):
36
  job_description = request.job_description
37
+ job_details = extract_job_details(job_description)
38
+ start_time=datetime.now()
39
+ df = calculate_final_score(resumes_data(), job_details)
40
+ end_time=datetime.now()
41
+ print('processing time:', end_time-start_time)
42
+ top_resumes = df.head(5)['Resume'].tolist()
43
  return ResumeResponse(top_resumes=top_resumes)
44
 
45
+ @app.get("/")
46
+ async def serve_homepage():
47
+ return FileResponse("static/index.html") # Return index.html properly
helper.py CHANGED
@@ -5,10 +5,12 @@ from fuzzywuzzy import process, fuzz
5
  from parse_job_description import extract_job_details
6
  from data import resumes_data
7
  import pandas as pd
 
 
8
 
9
  def extract_experience(text):
10
  # Patterns for identifying Bachelor's and Master's degrees
11
- print('Inside Extract Experience !!!')
12
  bachelors_patterns = [
13
  'bachelor', 'be', 'b.e.', 'b.tech', 'btech',
14
  'bachelor of engineering', 'graduation'
@@ -21,30 +23,37 @@ def extract_experience(text):
21
  all_years = re.findall(r'\b(\d{4})\b', text)
22
  all_years = sorted(map(int, all_years), reverse=True)
23
 
24
- # First, look for Bachelor's degree year
25
  for pattern in bachelors_patterns:
26
  for year in all_years:
27
  if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
28
  current_year = datetime.now().year
29
  return current_year - year
30
 
31
- # If no Bachelor's found, look for Master's degree year
32
  for pattern in masters_patterns:
33
  for year in all_years:
34
  if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
35
  current_year = datetime.now().year
36
  return current_year - year
37
 
38
- # Fallback to most recent year if no specific pattern found
39
  if all_years:
40
  current_year = datetime.now().year
41
  return current_year - all_years[0]
42
 
43
  return 0
44
 
 
 
 
 
 
 
 
 
 
45
  def extract_skills(text, job_details):
46
  job_skills=job_details['Skills']
47
- print('Inside Extract Skills !!!')
48
  found_skills=[]
49
  for skill in job_skills:
50
  best_match = process.extractOne(skill.lower(), text.lower().split())
@@ -53,8 +62,16 @@ def extract_skills(text, job_details):
53
  return found_skills
54
 
55
 
 
 
 
 
 
 
 
 
56
  def extract_education(text, job_details):
57
- print('Inside Extract Education!!!')
58
  education_patterns = job_details['Education']
59
  max_ratio=0
60
  for degree in education_patterns:
@@ -66,9 +83,18 @@ def extract_education(text, job_details):
66
  return max_ratio
67
 
68
 
 
 
 
 
 
 
 
 
 
69
  def match_personality_traits(resume_traits,job_details, threshold=70):
70
 
71
- print('Inside Match Personality!!!')
72
  """
73
  Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.
74
 
@@ -95,8 +121,15 @@ def match_personality_traits(resume_traits,job_details, threshold=70):
95
  return matches
96
 
97
 
 
 
 
 
 
 
 
98
  def scoring(resume_text, job_description):
99
- print('inside scoring ..............')
100
  # Extract all required information from the resume text and compare with job_description
101
  matched_skills = len(extract_skills(resume_text, job_description)) # Pass job_description here
102
  traits = match_personality_traits(resume_text, job_description) # Pass job_description here
@@ -118,11 +151,19 @@ def scoring(resume_text, job_description):
118
  }
119
 
120
 
 
 
 
 
 
 
 
 
121
 
122
  def get_scores_optimized(df, job_description):
123
- print('inside scores optimized..............')
124
  # Calculate all scores in a single apply operation
125
- print('Lets see how much time it takes now !!!!')
126
 
127
  results = df['Resume'].apply(lambda x: scoring(x, job_description))
128
 
@@ -131,4 +172,44 @@ def get_scores_optimized(df, job_description):
131
  scores_df = pd.DataFrame(results.tolist(), index=df.index)
132
 
133
  # Return the original dataframe with the new columns
134
- return pd.concat([df, scores_df], axis=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from parse_job_description import extract_job_details
6
  from data import resumes_data
7
  import pandas as pd
8
+ import multiprocessing as mp
9
+ from functools import partial
10
 
11
  def extract_experience(text):
12
  # Patterns for identifying Bachelor's and Master's degrees
13
+ # print('Inside Extract Experience !!!')
14
  bachelors_patterns = [
15
  'bachelor', 'be', 'b.e.', 'b.tech', 'btech',
16
  'bachelor of engineering', 'graduation'
 
23
  all_years = re.findall(r'\b(\d{4})\b', text)
24
  all_years = sorted(map(int, all_years), reverse=True)
25
 
26
+ # look for Bachelor's degree year
27
  for pattern in bachelors_patterns:
28
  for year in all_years:
29
  if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
30
  current_year = datetime.now().year
31
  return current_year - year
32
 
 
33
  for pattern in masters_patterns:
34
  for year in all_years:
35
  if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
36
  current_year = datetime.now().year
37
  return current_year - year
38
 
 
39
  if all_years:
40
  current_year = datetime.now().year
41
  return current_year - all_years[0]
42
 
43
  return 0
44
 
45
+ # current_time=datetime.now()
46
+ # df=resumes_data()
47
+ # exp=extract_experience(df['Resume'][10])
48
+ # print (exp)
49
+ # end_time=datetime.now()
50
+ # print('total time:', end_time-current_time)
51
+
52
+
53
+
54
  def extract_skills(text, job_details):
55
  job_skills=job_details['Skills']
56
+ # print('Inside Extract Skills !!!')
57
  found_skills=[]
58
  for skill in job_skills:
59
  best_match = process.extractOne(skill.lower(), text.lower().split())
 
62
  return found_skills
63
 
64
 
65
+ # current_time=datetime.now()
66
+ # df=resumes_data()
67
+ # exp=extract_skills(df['Resume'][10], job_details)
68
+ # print (exp)
69
+ # end_time=datetime.now()
70
+ # print('total time:', end_time-current_time)
71
+
72
+
73
  def extract_education(text, job_details):
74
+ # print('Inside Extract Education!!!')
75
  education_patterns = job_details['Education']
76
  max_ratio=0
77
  for degree in education_patterns:
 
83
  return max_ratio
84
 
85
 
86
+ # current_time=datetime.now()
87
+ # df=resumes_data()
88
+ # exp=extract_education(df['Resume'][10], job_details)
89
+ # print (exp)
90
+ # end_time=datetime.now()
91
+ # print('total time:', end_time-current_time)
92
+
93
+
94
+
95
  def match_personality_traits(resume_traits,job_details, threshold=70):
96
 
97
+ # print('Inside Match Personality!!!')
98
  """
99
  Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.
100
 
 
121
  return matches
122
 
123
 
124
+ # current_time=datetime.now()
125
+ # df=resumes_data()
126
+ # exp=match_personality_traits(df['Resume'][10], job_details)
127
+ # print (exp)
128
+ # end_time=datetime.now()
129
+ # print('total time:', end_time-current_time)
130
+
131
  def scoring(resume_text, job_description):
132
+ # print('inside scoring ..............')
133
  # Extract all required information from the resume text and compare with job_description
134
  matched_skills = len(extract_skills(resume_text, job_description)) # Pass job_description here
135
  traits = match_personality_traits(resume_text, job_description) # Pass job_description here
 
151
  }
152
 
153
 
154
+ # current_time=datetime.now()
155
+ # df=resumes_data()
156
+ # exp=scoring(df['Resume'][10], job_details)
157
+ # print (exp)
158
+ # end_time=datetime.now()
159
+ # print('total time:', end_time-current_time)
160
+
161
+
162
 
163
  def get_scores_optimized(df, job_description):
164
+
165
  # Calculate all scores in a single apply operation
166
+ # print('Lets see how much time it takes now !!!!')
167
 
168
  results = df['Resume'].apply(lambda x: scoring(x, job_description))
169
 
 
172
  scores_df = pd.DataFrame(results.tolist(), index=df.index)
173
 
174
  # Return the original dataframe with the new columns
175
+ return pd.concat([df, scores_df], axis=1)
176
+
177
+
178
+ import pandas as pd
179
+ import multiprocessing as mp
180
+ from functools import partial
181
+ import time
182
+
183
+ # First, ensure all the helper functions are defined at the module level
184
+ # These are the functions called by scoring(): extract_skills, match_personality_traits,
185
+ # extract_experience, and extract_education
186
+
187
+ def get_scores_optimized(df, job_description):
188
+ print('inside scores optimized..............')
189
+ start_time = time.time()
190
+
191
+ # Method 1: Use chunking with the original apply method
192
+ chunk_size = 32
193
+ results = []
194
+
195
+ for i in range(0, len(df), chunk_size):
196
+ chunk = df.iloc[i:i+chunk_size]
197
+ chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description))
198
+ results.extend(chunk_results.tolist())
199
+
200
+ # Convert the list of dictionaries into a DataFrame and join with original
201
+ scores_df = pd.DataFrame(results, index=df.index)
202
+
203
+ end_time = time.time()
204
+ print(f"Processing took {end_time - start_time:.2f} seconds")
205
+
206
+ # Return the original dataframe with the new columns
207
+ return pd.concat([df, scores_df], axis=1)
208
+
209
+
210
+ # current_time=datetime.now()
211
+ # df=resumes_data()
212
+ # exp=get_scores_optimized(df, job_details)
213
+ # print (exp)
214
+ # end_time=datetime.now()
215
+ # print('total time:', end_time-current_time)
index.html DELETED
@@ -1,79 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Job Matcher</title>
7
- <style>
8
- body {
9
- font-family: Arial, sans-serif;
10
- max-width: 600px;
11
- margin: 40px auto;
12
- padding: 20px;
13
- background-color: #f9f9f9;
14
- text-align: center;
15
- }
16
- textarea {
17
- width: 100%;
18
- height: 150px;
19
- padding: 10px;
20
- border: 1px solid #ccc;
21
- border-radius: 5px;
22
- }
23
- button {
24
- margin-top: 10px;
25
- padding: 10px 20px;
26
- background-color: #007BFF;
27
- color: white;
28
- border: none;
29
- border-radius: 5px;
30
- cursor: pointer;
31
- }
32
- button:hover {
33
- background-color: #0056b3;
34
- }
35
- .resume {
36
- background: white;
37
- padding: 10px;
38
- border-radius: 5px;
39
- margin-top: 10px;
40
- box-shadow: 0 0 5px rgba(0, 0, 0, 0.1);
41
- }
42
- </style>
43
- </head>
44
- <body>
45
- <h1>Job Description Input</h1>
46
- <textarea id="jobDescription" placeholder="Enter the job description..."></textarea>
47
- <button onclick="submitJobDescription()">Submit</button>
48
- <div id="resumes"></div>
49
-
50
- <script>
51
- async function submitJobDescription() {
52
- const jobDescription = document.getElementById("jobDescription").value;
53
- const resumesDiv = document.getElementById("resumes");
54
- resumesDiv.innerHTML = "Loading...";
55
-
56
- try {
57
- const response = await fetch("http://localhost:8000/match_resumes", {
58
- method: "POST",
59
- headers: { "Content-Type": "application/json" },
60
- body: JSON.stringify({ job_description: jobDescription })
61
- });
62
-
63
- if (!response.ok) throw new Error("Failed to fetch resumes");
64
- const data = await response.json();
65
-
66
- resumesDiv.innerHTML = "<h2>Top 5 Resumes</h2>";
67
- data.resumes.forEach(resume => {
68
- const div = document.createElement("div");
69
- div.className = "resume";
70
- div.textContent = resume;
71
- resumesDiv.appendChild(div);
72
- });
73
- } catch (error) {
74
- resumesDiv.innerHTML = `<p style='color: red;'>${error.message}</p>`;
75
- }
76
- }
77
- </script>
78
- </body>
79
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
parse_job_description.py CHANGED
@@ -6,8 +6,12 @@ import re
6
  import json
7
 
8
  load_dotenv()
 
 
9
  os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')
10
 
 
 
11
 
12
  job_description = """
13
  Five or more years of experience as engineer of software and networking platforms.
 
6
  import json
7
 
8
  load_dotenv()
9
+
10
+ ## For hugging face
11
  os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')
12
 
13
+ ## for local run
14
+ os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')
15
 
16
  job_description = """
17
  Five or more years of experience as engineer of software and networking platforms.
recommendation.py CHANGED
@@ -3,15 +3,13 @@ from helper import get_scores_optimized
3
 
4
 
5
  def calculate_final_score(df, job_details):
6
- # Select relevant columns
7
- print('Inside final score')
8
  columns_to_normalize = ['matched_skills', 'experience', 'education_relevance', 'trait_flag']
9
  df_scored=get_scores_optimized(df,job_details)
10
- # Apply Min-Max Normalization
11
  scaler = MinMaxScaler()
12
- print(f'lets see the columns: {df_scored.columns}')
13
  df_scored[columns_to_normalize] = scaler.fit_transform(df_scored[columns_to_normalize])
14
- print(f'scoring done !!!')
15
 
16
  # Define weights
17
  weights = {
@@ -32,3 +30,4 @@ def calculate_final_score(df, job_details):
32
  df_no_full_duplicates = df_sorted.drop_duplicates(keep="first")
33
  return df_no_full_duplicates
34
 
 
 
3
 
4
 
5
  def calculate_final_score(df, job_details):
6
+ # print('Inside final score')
 
7
  columns_to_normalize = ['matched_skills', 'experience', 'education_relevance', 'trait_flag']
8
  df_scored=get_scores_optimized(df,job_details)
 
9
  scaler = MinMaxScaler()
10
+ # print(f'lets see the columns: {df_scored.columns}')
11
  df_scored[columns_to_normalize] = scaler.fit_transform(df_scored[columns_to_normalize])
12
+ # print(f'scoring done !!!')
13
 
14
  # Define weights
15
  weights = {
 
30
  df_no_full_duplicates = df_sorted.drop_duplicates(keep="first")
31
  return df_no_full_duplicates
32
 
33
+