Spaces:

avimittal30
/

candidate-recommender

Sleeping

App Files Files Community

avimittal30 commited on Mar 30, 2025

Commit

a60a6e6

1 Parent(s): 45caa45

pushing chnages for front end

Browse files

Files changed (5) hide show

app.py +26 -8
helper.py +92 -11
index.html +0 -79
parse_job_description.py +4 -0
recommendation.py +4 -5

app.py CHANGED Viewed

@@ -2,28 +2,46 @@ from fastapi import FastAPI
 from recommendation import calculate_final_score
 from pydantic import BaseModel
 from typing import List
 from data import resumes_data
 from parse_job_description import extract_job_details
 app = FastAPI()
 class JobDescriptionRequest(BaseModel):
     job_description: str  # Accepts job description text
 class ResumeResponse(BaseModel):
     top_resumes: List[str]
-output_json={}
 @app.post("/candidate_recommendation/", response_model=ResumeResponse)
 def get_best_resumes(request: JobDescriptionRequest):
     job_description = request.job_description
-    job_details=extract_job_details(job_description)
-    df=calculate_final_score(resumes_data(), job_details)
-    top_resumes=df.head(5)['Resume'].tolist()
     return ResumeResponse(top_resumes=top_resumes)

 from recommendation import calculate_final_score
 from pydantic import BaseModel
 from typing import List
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
 from data import resumes_data
 from parse_job_description import extract_job_details
+from datetime import datetime
+from fastapi.middleware.cors import CORSMiddleware
 app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  #
+    allow_credentials=True,
+    allow_methods=["*"],  #
+    allow_headers=["*"],  #
+)
+# Mount the static directory
+app.mount("/static", StaticFiles(directory="static"), name="static")
 class JobDescriptionRequest(BaseModel):
     job_description: str  # Accepts job description text
 class ResumeResponse(BaseModel):
     top_resumes: List[str]
+# job_details={'Skills': ['Java', 'Python', 'C++', 'Document design processes', 'Development', 'Testing', 'Analytics', 'Troubleshooting', 'Rapid development cycles', 'Scripting', 'Test automation', 'Relational databases', 'ORM', 'SQL technologies', 'HTML5', 'CSS3', 'Content management systems', 'Web application development', 'Wicket', 'GWT', 'Spring MVC'], 'Experience': 6, 'Personality Traits': ['Desire to continue professional growth through training and education'], 'Education': ['B.E. Software Engineering', 'BE Software Engineering', 'Bachelors of Engineering Software Engineering', 'B.Tech Software Engineering', 'B.Tech. Software Engineering', 'BTech Software Engineering', 'Bachelor of Technology Software Engineering', 'B.E. Information Technology', 'BE Information Technology', 'Bachelors of Engineering Information Technology', 'B.Tech Information Technology', 'B.Tech. Information Technology', 'BTech Information Technology', 'Bachelor of Technology Information Technology']}
 @app.post("/candidate_recommendation/", response_model=ResumeResponse)
 def get_best_resumes(request: JobDescriptionRequest):
     job_description = request.job_description
+    job_details = extract_job_details(job_description)
+    start_time=datetime.now()
+    df = calculate_final_score(resumes_data(), job_details)
+    end_time=datetime.now()
+    print('processing time:', end_time-start_time)
+    top_resumes = df.head(5)['Resume'].tolist()
     return ResumeResponse(top_resumes=top_resumes)
+@app.get("/")
+async def serve_homepage():
+    return FileResponse("static/index.html")  # Return index.html properly

helper.py CHANGED Viewed

@@ -5,10 +5,12 @@ from fuzzywuzzy import process, fuzz
 from parse_job_description import extract_job_details
 from data import resumes_data
 import pandas as pd
 def extract_experience(text):
     # Patterns for identifying Bachelor's and Master's degrees
-    print('Inside Extract Experience !!!')
     bachelors_patterns = [
         'bachelor', 'be', 'b.e.', 'b.tech', 'btech',
         'bachelor of engineering', 'graduation'
@@ -21,30 +23,37 @@ def extract_experience(text):
     all_years = re.findall(r'\b(\d{4})\b', text)
     all_years = sorted(map(int, all_years), reverse=True)
-    # First, look for Bachelor's degree year
     for pattern in bachelors_patterns:
         for year in all_years:
             if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
                 current_year = datetime.now().year
                 return current_year - year
-    # If no Bachelor's found, look for Master's degree year
     for pattern in masters_patterns:
         for year in all_years:
             if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
                 current_year = datetime.now().year
                 return current_year - year
-    # Fallback to most recent year if no specific pattern found
     if all_years:
         current_year = datetime.now().year
         return current_year - all_years[0]
     return 0
 def extract_skills(text, job_details):
   job_skills=job_details['Skills']
-  print('Inside Extract Skills !!!')
   found_skills=[]
   for skill in job_skills:
     best_match = process.extractOne(skill.lower(), text.lower().split())
@@ -53,8 +62,16 @@ def extract_skills(text, job_details):
   return found_skills
 def extract_education(text, job_details):
-    print('Inside Extract Education!!!')
     education_patterns = job_details['Education']
     max_ratio=0
     for degree in education_patterns:
@@ -66,9 +83,18 @@ def extract_education(text, job_details):
     return max_ratio
 def match_personality_traits(resume_traits,job_details, threshold=70):
-    print('Inside Match Personality!!!')
     """
     Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.
@@ -95,8 +121,15 @@ def match_personality_traits(resume_traits,job_details, threshold=70):
     return matches
 def scoring(resume_text, job_description):
-    print('inside  scoring ..............')
     # Extract all required information from the resume text and compare with job_description
     matched_skills = len(extract_skills(resume_text, job_description))  # Pass job_description here
     traits = match_personality_traits(resume_text, job_description)  # Pass job_description here
@@ -118,11 +151,19 @@ def scoring(resume_text, job_description):
     }
 def get_scores_optimized(df, job_description):
-    print('inside  scores optimized..............')
     # Calculate all scores in a single apply operation
-    print('Lets see how much time it takes now !!!!')
     results = df['Resume'].apply(lambda x: scoring(x, job_description))
@@ -131,4 +172,44 @@ def get_scores_optimized(df, job_description):
     scores_df = pd.DataFrame(results.tolist(), index=df.index)
     # Return the original dataframe with the new columns
-    return pd.concat([df, scores_df], axis=1)

 from parse_job_description import extract_job_details
 from data import resumes_data
 import pandas as pd
+import multiprocessing as mp
+from functools import partial
 def extract_experience(text):
     # Patterns for identifying Bachelor's and Master's degrees
+    # print('Inside Extract Experience !!!')
     bachelors_patterns = [
         'bachelor', 'be', 'b.e.', 'b.tech', 'btech',
         'bachelor of engineering', 'graduation'
     all_years = re.findall(r'\b(\d{4})\b', text)
     all_years = sorted(map(int, all_years), reverse=True)
+    # look for Bachelor's degree year
     for pattern in bachelors_patterns:
         for year in all_years:
             if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
                 current_year = datetime.now().year
                 return current_year - year
     for pattern in masters_patterns:
         for year in all_years:
             if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
                 current_year = datetime.now().year
                 return current_year - year
     if all_years:
         current_year = datetime.now().year
         return current_year - all_years[0]
     return 0
+# current_time=datetime.now()
+# df=resumes_data()
+# exp=extract_experience(df['Resume'][10])
+# print (exp)
+# end_time=datetime.now()
+# print('total time:', end_time-current_time)
 def extract_skills(text, job_details):
   job_skills=job_details['Skills']
+#   print('Inside Extract Skills !!!')
   found_skills=[]
   for skill in job_skills:
     best_match = process.extractOne(skill.lower(), text.lower().split())
   return found_skills
+# current_time=datetime.now()
+# df=resumes_data()
+# exp=extract_skills(df['Resume'][10], job_details)
+# print (exp)
+# end_time=datetime.now()
+# print('total time:', end_time-current_time)
 def extract_education(text, job_details):
+    # print('Inside Extract Education!!!')
     education_patterns = job_details['Education']
     max_ratio=0
     for degree in education_patterns:
     return max_ratio
+# current_time=datetime.now()
+# df=resumes_data()
+# exp=extract_education(df['Resume'][10], job_details)
+# print (exp)
+# end_time=datetime.now()
+# print('total time:', end_time-current_time)
 def match_personality_traits(resume_traits,job_details, threshold=70):
+    # print('Inside Match Personality!!!')
     """
     Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.
     return matches
+# current_time=datetime.now()
+# df=resumes_data()
+# exp=match_personality_traits(df['Resume'][10], job_details)
+# print (exp)
+# end_time=datetime.now()
+# print('total time:', end_time-current_time)
 def scoring(resume_text, job_description):
+    # print('inside  scoring ..............')
     # Extract all required information from the resume text and compare with job_description
     matched_skills = len(extract_skills(resume_text, job_description))  # Pass job_description here
     traits = match_personality_traits(resume_text, job_description)  # Pass job_description here
     }
+# current_time=datetime.now()
+# df=resumes_data()
+# exp=scoring(df['Resume'][10], job_details)
+# print (exp)
+# end_time=datetime.now()
+# print('total time:', end_time-current_time)
 def get_scores_optimized(df, job_description):
     # Calculate all scores in a single apply operation
+    # print('Lets see how much time it takes now !!!!')
     results = df['Resume'].apply(lambda x: scoring(x, job_description))
     scores_df = pd.DataFrame(results.tolist(), index=df.index)
     # Return the original dataframe with the new columns
+    return pd.concat([df, scores_df], axis=1)
+import pandas as pd
+import multiprocessing as mp
+from functools import partial
+import time
+# First, ensure all the helper functions are defined at the module level
+# These are the functions called by scoring(): extract_skills, match_personality_traits,
+# extract_experience, and extract_education
+def get_scores_optimized(df, job_description):
+    print('inside scores optimized..............')
+    start_time = time.time()
+    # Method 1: Use chunking with the original apply method
+    chunk_size = 32
+    results = []
+    for i in range(0, len(df), chunk_size):
+        chunk = df.iloc[i:i+chunk_size]
+        chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description))
+        results.extend(chunk_results.tolist())
+    # Convert the list of dictionaries into a DataFrame and join with original
+    scores_df = pd.DataFrame(results, index=df.index)
+    end_time = time.time()
+    print(f"Processing took {end_time - start_time:.2f} seconds")
+    # Return the original dataframe with the new columns
+    return pd.concat([df, scores_df], axis=1)
+# current_time=datetime.now()
+# df=resumes_data()
+# exp=get_scores_optimized(df, job_details)
+# print (exp)
+# end_time=datetime.now()
+# print('total time:', end_time-current_time)

index.html DELETED Viewed

@@ -1,79 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Job Matcher</title>
-    <style>
-        body {
-            font-family: Arial, sans-serif;
-            max-width: 600px;
-            margin: 40px auto;
-            padding: 20px;
-            background-color: #f9f9f9;
-            text-align: center;
-        }
-        textarea {
-            width: 100%;
-            height: 150px;
-            padding: 10px;
-            border: 1px solid #ccc;
-            border-radius: 5px;
-        }
-        button {
-            margin-top: 10px;
-            padding: 10px 20px;
-            background-color: #007BFF;
-            color: white;
-            border: none;
-            border-radius: 5px;
-            cursor: pointer;
-        }
-        button:hover {
-            background-color: #0056b3;
-        }
-        .resume {
-            background: white;
-            padding: 10px;
-            border-radius: 5px;
-            margin-top: 10px;
-            box-shadow: 0 0 5px rgba(0, 0, 0, 0.1);
-        }
-    </style>
-</head>
-<body>
-    <h1>Job Description Input</h1>
-    <textarea id="jobDescription" placeholder="Enter the job description..."></textarea>
-    <button onclick="submitJobDescription()">Submit</button>
-    <div id="resumes"></div>
-    <script>
-        async function submitJobDescription() {
-            const jobDescription = document.getElementById("jobDescription").value;
-            const resumesDiv = document.getElementById("resumes");
-            resumesDiv.innerHTML = "Loading...";
-            try {
-                const response = await fetch("http://localhost:8000/match_resumes", {
-                    method: "POST",
-                    headers: { "Content-Type": "application/json" },
-                    body: JSON.stringify({ job_description: jobDescription })
-                });
-                if (!response.ok) throw new Error("Failed to fetch resumes");
-                const data = await response.json();
-                resumesDiv.innerHTML = "<h2>Top 5 Resumes</h2>";
-                data.resumes.forEach(resume => {
-                    const div = document.createElement("div");
-                    div.className = "resume";
-                    div.textContent = resume;
-                    resumesDiv.appendChild(div);
-                });
-            } catch (error) {
-                resumesDiv.innerHTML = `<p style='color: red;'>${error.message}</p>`;
-            }
-        }
-    </script>
-</body>
-</html>

parse_job_description.py CHANGED Viewed

@@ -6,8 +6,12 @@ import re
 import json
 load_dotenv()
 os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')
 job_description = """
 Five or more years of experience as engineer of software and networking platforms.

 import json
 load_dotenv()
+## For hugging face
 os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')
+## for local run
+os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')
 job_description = """
 Five or more years of experience as engineer of software and networking platforms.

recommendation.py CHANGED Viewed

@@ -3,15 +3,13 @@ from helper import get_scores_optimized
 def calculate_final_score(df, job_details):
-    # Select relevant columns
-    print('Inside final score')
     columns_to_normalize = ['matched_skills', 'experience', 'education_relevance', 'trait_flag']
     df_scored=get_scores_optimized(df,job_details)
-    # Apply Min-Max Normalization
     scaler = MinMaxScaler()
-    print(f'lets see the columns: {df_scored.columns}')
     df_scored[columns_to_normalize] = scaler.fit_transform(df_scored[columns_to_normalize])
-    print(f'scoring done !!!')
     # Define weights
     weights = {
@@ -32,3 +30,4 @@ def calculate_final_score(df, job_details):
     df_no_full_duplicates = df_sorted.drop_duplicates(keep="first")
     return df_no_full_duplicates

 def calculate_final_score(df, job_details):
+    # print('Inside final score')
     columns_to_normalize = ['matched_skills', 'experience', 'education_relevance', 'trait_flag']
     df_scored=get_scores_optimized(df,job_details)
     scaler = MinMaxScaler()
+    # print(f'lets see the columns: {df_scored.columns}')
     df_scored[columns_to_normalize] = scaler.fit_transform(df_scored[columns_to_normalize])
+    # print(f'scoring done !!!')
     # Define weights
     weights = {
     df_no_full_duplicates = df_sorted.drop_duplicates(keep="first")
     return df_no_full_duplicates