Spaces:
Runtime error
Runtime error
Caitlin Blackmore
commited on
Commit
·
8de44db
1
Parent(s):
9b3b1bc
performance optimization enhancements
Browse files- main.py +10 -8
- match_utils.py +18 -6
- static/cohere_tSNE_dat.csv +3 -0
- templates/job_neighborhoods.html +0 -0
main.py
CHANGED
|
@@ -7,13 +7,13 @@
|
|
| 7 |
# License: MIT License
|
| 8 |
|
| 9 |
# IMPORTS
|
| 10 |
-
from fastapi import FastAPI, Request, Form, File, UploadFile
|
| 11 |
from fastapi.templating import Jinja2Templates
|
| 12 |
from fastapi.staticfiles import StaticFiles
|
| 13 |
from fastapi.responses import HTMLResponse
|
| 14 |
import pandas as pd
|
| 15 |
from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
|
| 16 |
-
from match_utils import get_resume,
|
| 17 |
|
| 18 |
# APP SETUP
|
| 19 |
app = FastAPI()
|
|
@@ -26,18 +26,19 @@ onet = pd.read_csv('static/ONET_JobTitles.csv')
|
|
| 26 |
### JOB INFORMATION CENTER ###
|
| 27 |
# GET
|
| 28 |
@app.get("/")
|
| 29 |
-
def
|
| 30 |
joblist = onet['JobTitle']
|
| 31 |
return templates.TemplateResponse('job_list.html', context={'request': request, 'joblist': joblist})
|
| 32 |
|
| 33 |
# POST
|
| 34 |
@app.post("/")
|
| 35 |
-
def
|
| 36 |
joblist = onet['JobTitle']
|
| 37 |
if jobtitle:
|
| 38 |
onetCode = get_onet_code(jobtitle)
|
| 39 |
jobdescription = get_onet_description(onetCode)
|
| 40 |
tasks = get_onet_tasks(onetCode)
|
|
|
|
| 41 |
return templates.TemplateResponse('job_list.html', context={
|
| 42 |
'request': request,
|
| 43 |
'joblist': joblist,
|
|
@@ -47,19 +48,20 @@ def render_job_info(request: Request, jobtitle: str = Form(enum=[x for x in onet
|
|
| 47 |
|
| 48 |
### JOB NEIGHBORHOODS ###
|
| 49 |
@app.get("/explore-job-neighborhoods/", response_class=HTMLResponse)
|
| 50 |
-
def
|
| 51 |
return templates.TemplateResponse('job_neighborhoods.html', context={'request': request})
|
| 52 |
|
| 53 |
### FIND-MY-MATCH ###
|
| 54 |
# GET
|
| 55 |
@app.get("/find-my-match/", response_class=HTMLResponse)
|
| 56 |
-
def
|
| 57 |
return templates.TemplateResponse('find_my_match.html', context={'request': request})
|
| 58 |
|
| 59 |
# POST
|
| 60 |
@app.post('/find-my-match/', response_class=HTMLResponse)
|
| 61 |
-
async def
|
| 62 |
resume = get_resume(resume)
|
| 63 |
-
|
|
|
|
| 64 |
skills = await skillNER(resume)
|
| 65 |
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
|
|
|
|
| 7 |
# License: MIT License
|
| 8 |
|
| 9 |
# IMPORTS
|
| 10 |
+
from fastapi import FastAPI, Request, Form, File, UploadFile, BackgroundTasks
|
| 11 |
from fastapi.templating import Jinja2Templates
|
| 12 |
from fastapi.staticfiles import StaticFiles
|
| 13 |
from fastapi.responses import HTMLResponse
|
| 14 |
import pandas as pd
|
| 15 |
from scrape_onet import get_onet_code, get_onet_description, get_onet_tasks
|
| 16 |
+
from match_utils import neighborhoods, get_resume, coSkillEmbed, sim_result_loop, skillNER
|
| 17 |
|
| 18 |
# APP SETUP
|
| 19 |
app = FastAPI()
|
|
|
|
| 26 |
### JOB INFORMATION CENTER ###
|
| 27 |
# GET
|
| 28 |
@app.get("/")
|
| 29 |
+
def get_job(request: Request):
|
| 30 |
joblist = onet['JobTitle']
|
| 31 |
return templates.TemplateResponse('job_list.html', context={'request': request, 'joblist': joblist})
|
| 32 |
|
| 33 |
# POST
|
| 34 |
@app.post("/")
|
| 35 |
+
def post_job(request: Request, bt: BackgroundTasks, jobtitle: str = Form(enum=[x for x in onet['JobTitle']])):
|
| 36 |
joblist = onet['JobTitle']
|
| 37 |
if jobtitle:
|
| 38 |
onetCode = get_onet_code(jobtitle)
|
| 39 |
jobdescription = get_onet_description(onetCode)
|
| 40 |
tasks = get_onet_tasks(onetCode)
|
| 41 |
+
bt.add_task(neighborhoods, jobtitle)
|
| 42 |
return templates.TemplateResponse('job_list.html', context={
|
| 43 |
'request': request,
|
| 44 |
'joblist': joblist,
|
|
|
|
| 48 |
|
| 49 |
### JOB NEIGHBORHOODS ###
|
| 50 |
@app.get("/explore-job-neighborhoods/", response_class=HTMLResponse)
|
| 51 |
+
async def get_job_neighborhoods(request: Request):
|
| 52 |
return templates.TemplateResponse('job_neighborhoods.html', context={'request': request})
|
| 53 |
|
| 54 |
### FIND-MY-MATCH ###
|
| 55 |
# GET
|
| 56 |
@app.get("/find-my-match/", response_class=HTMLResponse)
|
| 57 |
+
def get_matches(request: Request):
|
| 58 |
return templates.TemplateResponse('find_my_match.html', context={'request': request})
|
| 59 |
|
| 60 |
# POST
|
| 61 |
@app.post('/find-my-match/', response_class=HTMLResponse)
|
| 62 |
+
async def post_matches(request: Request, resume: UploadFile = File(...)):
|
| 63 |
resume = get_resume(resume)
|
| 64 |
+
embeds = await coSkillEmbed(resume)
|
| 65 |
+
simResults = await sim_result_loop(embeds)
|
| 66 |
skills = await skillNER(resume)
|
| 67 |
return templates.TemplateResponse('find_my_match.html', context={'request': request, 'resume': resume, 'skills': skills, 'simResults': simResults})
|
match_utils.py
CHANGED
|
@@ -12,7 +12,7 @@ import numpy as np
|
|
| 12 |
from numpy.linalg import norm
|
| 13 |
import ssl
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
-
import
|
| 16 |
|
| 17 |
# SSL CERTIFICATE FIX
|
| 18 |
try:
|
|
@@ -31,6 +31,7 @@ load_dotenv()
|
|
| 31 |
|
| 32 |
# LOAD COHERE EMBEDDINGS:
|
| 33 |
simdat = pd.read_csv('static/cohere_embeddings.csv')
|
|
|
|
| 34 |
|
| 35 |
# LOAD FINE-TUNED MODEL
|
| 36 |
# (see https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier)
|
|
@@ -39,6 +40,18 @@ tokenizer = AutoTokenizer.from_pretrained('static/tokenizer_shards', low_cpu_mem
|
|
| 39 |
classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
|
| 40 |
|
| 41 |
# UTILITY FUNCTIONS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
def get_resume(resume):
|
| 43 |
path = f"static/{resume.filename}"
|
| 44 |
with open(path, 'wb') as buffer:
|
|
@@ -50,7 +63,7 @@ def get_resume(resume):
|
|
| 50 |
resume = "\n".join(text)
|
| 51 |
return resume
|
| 52 |
|
| 53 |
-
def coSkillEmbed(text):
|
| 54 |
try:
|
| 55 |
co = cohere.Client(os.getenv("COHERE_TOKEN"))
|
| 56 |
response = co.embed(
|
|
@@ -60,10 +73,9 @@ def coSkillEmbed(text):
|
|
| 60 |
except CohereError as e:
|
| 61 |
return e
|
| 62 |
|
| 63 |
-
async def
|
| 64 |
def cosine(A, B):
|
| 65 |
return np.dot(A,B)/(norm(A)*norm(B))
|
| 66 |
-
embeds = coSkillEmbed(resume)
|
| 67 |
simResults = []
|
| 68 |
for i in range(len(simdat)):
|
| 69 |
simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:])))
|
|
@@ -103,5 +115,5 @@ async def skillNER(resume):
|
|
| 103 |
labels.append("Skill")
|
| 104 |
else:
|
| 105 |
labels.append("Not Skill")
|
| 106 |
-
|
| 107 |
-
return
|
|
|
|
| 12 |
from numpy.linalg import norm
|
| 13 |
import ssl
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
+
import plotly_express as px
|
| 16 |
|
| 17 |
# SSL CERTIFICATE FIX
|
| 18 |
try:
|
|
|
|
| 31 |
|
| 32 |
# LOAD COHERE EMBEDDINGS:
|
| 33 |
simdat = pd.read_csv('static/cohere_embeddings.csv')
|
| 34 |
+
coheredat = pd.read_csv('static/cohere_tSNE_dat.csv')
|
| 35 |
|
| 36 |
# LOAD FINE-TUNED MODEL
|
| 37 |
# (see https://huggingface.co/celise88/distilbert-base-uncased-finetuned-binary-classifier)
|
|
|
|
| 40 |
classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
|
| 41 |
|
| 42 |
# UTILITY FUNCTIONS
|
| 43 |
+
async def neighborhoods(jobtitle=None):
|
| 44 |
+
def format_title(logo, title, subtitle, title_font_size = 28, subtitle_font_size=14):
|
| 45 |
+
logo = f'<a href="/" target="_self">{logo}</a>'
|
| 46 |
+
subtitle = f'<span style="font-size: {subtitle_font_size}px;">{subtitle}</span>'
|
| 47 |
+
title = f'<span style="font-size: {title_font_size}px;">{title}</span>'
|
| 48 |
+
return f'{logo}{title}<br>{subtitle}'
|
| 49 |
+
|
| 50 |
+
fig = px.scatter(coheredat, x = 'longitude', y = 'latitude', color = 'Category', hover_data = ['Category', 'Title'],
|
| 51 |
+
title=format_title("Pathfinder", " Job Neighborhoods: Explore the Map!", "(Generated using Co-here AI's LLM & ONET's Task Statements)"))
|
| 52 |
+
fig['layout'].update(height=1000, width=1500, font=dict(family='Courier New, monospace', color='black'))
|
| 53 |
+
fig.write_html('templates/job_neighborhoods.html')
|
| 54 |
+
|
| 55 |
def get_resume(resume):
|
| 56 |
path = f"static/{resume.filename}"
|
| 57 |
with open(path, 'wb') as buffer:
|
|
|
|
| 63 |
resume = "\n".join(text)
|
| 64 |
return resume
|
| 65 |
|
| 66 |
+
async def coSkillEmbed(text):
|
| 67 |
try:
|
| 68 |
co = cohere.Client(os.getenv("COHERE_TOKEN"))
|
| 69 |
response = co.embed(
|
|
|
|
| 73 |
except CohereError as e:
|
| 74 |
return e
|
| 75 |
|
| 76 |
+
async def sim_result_loop(embeds):
|
| 77 |
def cosine(A, B):
|
| 78 |
return np.dot(A,B)/(norm(A)*norm(B))
|
|
|
|
| 79 |
simResults = []
|
| 80 |
for i in range(len(simdat)):
|
| 81 |
simResults.append(cosine(np.array(embeds), np.array(simdat.iloc[i,1:])))
|
|
|
|
| 115 |
labels.append("Skill")
|
| 116 |
else:
|
| 117 |
labels.append("Not Skill")
|
| 118 |
+
skills = dict(zip(resume, labels))
|
| 119 |
+
return skills
|
static/cohere_tSNE_dat.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac3dbbea21867638654b3c399b988ca95c5573cc602383d8835cffe36952a7cb
|
| 3 |
+
size 1858107
|
templates/job_neighborhoods.html
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|