Jayavathsan commited on
Commit
df18f05
·
1 Parent(s): 30af218

Upload 11 files

Browse files
.env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ OPENAI_API_KEY=""
2
+ HUGGINGFACEHUB_API_TOKEN=""
Docs/embedded-software-engineer-resume-example.pdf ADDED
Binary file (32.9 kB). View file
 
Docs/java-programmer-resume-example.pdf ADDED
Binary file (25.4 kB). View file
 
Docs/principal-software-engineer-resume-example.pdf ADDED
Binary file (49 kB). View file
 
Docs/python-developer-resume-example.pdf ADDED
Binary file (33.7 kB). View file
 
Docs/security-engineer-resume-example.pdf ADDED
Binary file (31.3 kB). View file
 
Docs/senior-programmer-resume-example.pdf ADDED
Binary file (31.5 kB). View file
 
Docs/software-engineer-iii-front-end-resume-example.pdf ADDED
Binary file (46.3 kB). View file
 
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from utils import *
4
+ import uuid
5
+
6
+ #Creating session variables
7
+ if 'unique_id' not in st.session_state:
8
+ st.session_state['unique_id'] =''
9
+
10
+ def main():
11
+ load_dotenv()
12
+
13
+ st.set_page_config(page_title="Resume Screening Assistance")
14
+ st.title("HR - Resume Screening Assistance...💁 ")
15
+ st.subheader("I can help you in resume screening process")
16
+
17
+ job_description = st.text_area("Please paste the 'JOB DESCRIPTION' here...",key="1")
18
+ document_count = st.text_input("No.of 'RESUMES' to return",key="2")
19
+ # Upload the Resumes (pdf files)
20
+ pdf = st.file_uploader("Upload resumes here, only PDF files allowed", type=["pdf"],accept_multiple_files=True)
21
+
22
+ submit=st.button("Help me with the analysis")
23
+
24
+ if submit:
25
+ with st.spinner('Wait for it...'):
26
+
27
+ #Creating a unique ID, so that we can use to query and get only the user uploaded documents from PINECONE vector store
28
+ st.session_state['unique_id']=uuid.uuid4().hex
29
+
30
+ #Create a documents list out of all the user uploaded pdf files
31
+ final_docs_list=create_docs(pdf,st.session_state['unique_id'])
32
+
33
+ #Displaying the count of resumes that have been uploaded
34
+ st.write("*Resumes uploaded* :"+str(len(final_docs_list)))
35
+
36
+ #Create embeddings instance
37
+ embeddings=create_embeddings_load_data()
38
+
39
+ #Push data to PINECONE
40
+ push_to_pinecone("e697b71c-d5ed-4c66-8625-ac1c403a2df1","us-west1-gcp-free","test",embeddings,final_docs_list)
41
+
42
+ #Fecth relavant documents from PINECONE
43
+ relavant_docs=similar_docs(job_description,document_count,"e697b71c-d5ed-4c66-8625-ac1c403a2df1","us-west1-gcp-free","test",embeddings,st.session_state['unique_id'])
44
+
45
+ #t.write(relavant_docs)
46
+
47
+ #Introducing a line separator
48
+ st.write(":heavy_minus_sign:" * 30)
49
+
50
+ #For each item in relavant docs - we are displaying some info of it on the UI
51
+ for item in range(len(relavant_docs)):
52
+
53
+ st.subheader("👉 "+str(item+1))
54
+
55
+ #Displaying Filepath
56
+ st.write("**File** : "+relavant_docs[item][0].metadata['name'])
57
+
58
+ #Introducing Expander feature
59
+ with st.expander('Show me 👀'):
60
+ st.info("**Match Score** : "+str(relavant_docs[item][1]))
61
+ #st.write("***"+relavant_docs[item][0].page_content)
62
+
63
+ #Gets the summary of the current item using 'get_summary' function that we have created which uses LLM & Langchain chain
64
+ summary = get_summary(relavant_docs[item][0])
65
+ st.write("**Summary** : "+summary)
66
+
67
+ st.success("Hope I was able to save your time❤️")
68
+
69
+
70
+ #Invoking main function
71
+ if __name__ == '__main__':
72
+ main()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ streamlit
3
+ openai
4
+ tiktoken
5
+ python-dotenv
6
+ unstructured
7
+ pinecone-client
8
+ pypdf
9
+ sentence_transformers
utils.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.vectorstores import Pinecone
4
+ from langchain.llms import OpenAI
5
+ from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
6
+ from langchain.schema import Document
7
+ import pinecone
8
+ from pypdf import PdfReader
9
+ from langchain.llms.openai import OpenAI
10
+ from langchain.chains.summarize import load_summarize_chain
11
+ from langchain import HuggingFaceHub
12
+
13
+
14
+ #Extract Information from PDF file
15
+ def get_pdf_text(pdf_doc):
16
+ text = ""
17
+ pdf_reader = PdfReader(pdf_doc)
18
+ for page in pdf_reader.pages:
19
+ text += page.extract_text()
20
+ return text
21
+
22
+
23
+
24
+ # iterate over files in
25
+ # that user uploaded PDF files, one by one
26
+ def create_docs(user_pdf_list, unique_id):
27
+ docs=[]
28
+ for filename in user_pdf_list:
29
+
30
+ chunks=get_pdf_text(filename)
31
+
32
+ #Adding items to our list - Adding data & its metadata
33
+ docs.append(Document(
34
+ page_content=chunks,
35
+ metadata={"name": filename.name,"id":filename.id,"type=":filename.type,"size":filename.size,"unique_id":unique_id},
36
+ ))
37
+
38
+ return docs
39
+
40
+
41
+ #Create embeddings instance
42
+ def create_embeddings_load_data():
43
+ #embeddings = OpenAIEmbeddings()
44
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
45
+ return embeddings
46
+
47
+
48
+ #Function to push data to Vector Store - Pinecone here
49
+ def push_to_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,docs):
50
+
51
+ pinecone.init(
52
+ api_key=pinecone_apikey,
53
+ environment=pinecone_environment
54
+ )
55
+ print("done......2")
56
+ Pinecone.from_documents(docs, embeddings, index_name=pinecone_index_name)
57
+
58
+
59
+
60
+ #Function to pull infrmation from Vector Store - Pinecone here
61
+ def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
62
+
63
+ pinecone.init(
64
+ api_key=pinecone_apikey,
65
+ environment=pinecone_environment
66
+ )
67
+
68
+ index_name = pinecone_index_name
69
+
70
+ index = Pinecone.from_existing_index(index_name, embeddings)
71
+ return index
72
+
73
+
74
+
75
+ #Function to help us get relavant documents from vector store - based on user input
76
+ def similar_docs(query,k,pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,unique_id):
77
+
78
+ pinecone.init(
79
+ api_key=pinecone_apikey,
80
+ environment=pinecone_environment
81
+ )
82
+
83
+ index_name = pinecone_index_name
84
+
85
+ index = pull_from_pinecone(pinecone_apikey,pinecone_environment,index_name,embeddings)
86
+ similar_docs = index.similarity_search_with_score(query, int(k),{"unique_id":unique_id})
87
+ #print(similar_docs)
88
+ return similar_docs
89
+
90
+
91
+ # Helps us get the summary of a document
92
+ def get_summary(current_doc):
93
+ llm = OpenAI(temperature=0)
94
+ #llm = HuggingFaceHub(repo_id="bigscience/bloom", model_kwargs={"temperature":1e-10})
95
+ chain = load_summarize_chain(llm, chain_type="map_reduce")
96
+ summary = chain.run([current_doc])
97
+
98
+ return summary
99
+
100
+
101
+
102
+
103
+