Spaces:
Sleeping
Sleeping
File size: 2,017 Bytes
a101471 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | import pathlib
import ast
import os
from pypdf import PdfReader
import docx2txt
from sqlalchemy import select
from sqlalchemy.orm import Session
import openai
import cohere
from models import Job, ENGINE
openai.api_key = os.environ["OPEN_API_KEY"]
co = cohere.Client(os.environ["COHERE_API_KEY"])
def gpt(user_query):
response = openai.Completion.create(
engine="text-davinci-003",
prompt = user_query,
max_tokens=1024,
n=1,
stop=None,
temperature=0.5,
)
return response['choices'][0]['text']
def parse_pdf(file_name):
reader = PdfReader(file_name)
page = reader.pages[0]
resume_text = page.extract_text()
return resume_text
def parse_docx(file_name):
file_text = docx2txt.process(file_name)
return file_text
# def get_dict(resume_text):
# resume_dict = ast.literal_eval(gpt(f"""parse the resume and convert it into a Python string with the headings as "experience," "skills," "certifications," and "education".
# resume: "{resume_text}"
# resume_dict: """).strip())
# return resume_dict
def parse(filename):
resume_file = pathlib.Path(filename)
text = parse_pdf(resume_file) if resume_file.suffix == ".pdf" else parse_docx(resume_file)
print("parse"+"~"*10,text)
# dct = get_dict(text)
# print(dct)
return text
def rerank(job_id,docs,top_n):
with Session(ENGINE) as session:
stmt = select(Job).where(Job.job_id == job_id)
job = session.scalars(stmt).one()
post = job.post_name
response = co.rerank(
model = 'rerank-english-v2.0',
query = f'Which profile suits most for the role of {post}?',
documents = docs,
top_n = top_n,
)
print(response)
return response |