First CVchat app working version
Browse files- app.py +63 -0
- requirements.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import logging
|
| 3 |
+
from langchain.chains import RetrievalQA
|
| 4 |
+
from langchain.llms import OpenAI
|
| 5 |
+
from langchain.document_loaders import PDFMinerLoader
|
| 6 |
+
from langchain.indexes import VectorstoreIndexCreator
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
def set_openai_key(raw_key):
|
| 10 |
+
logging.warning(raw_key)
|
| 11 |
+
os.environ["OPENAI_API_KEY"] = raw_key
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def create_langchain(pdf_object):
|
| 15 |
+
logging.info(f"Creating langchain for {pdf_object.name}")
|
| 16 |
+
loader = PDFMinerLoader(pdf_object.name)
|
| 17 |
+
index_creator = VectorstoreIndexCreator()
|
| 18 |
+
docsearch = index_creator.from_loaders([loader])
|
| 19 |
+
chain = RetrievalQA.from_chain_type(
|
| 20 |
+
llm=OpenAI(),
|
| 21 |
+
chain_type="stuff",
|
| 22 |
+
retriever=docsearch.vectorstore.as_retriever(),
|
| 23 |
+
input_key="question",
|
| 24 |
+
verbose=True,
|
| 25 |
+
return_source_documents=True,
|
| 26 |
+
)
|
| 27 |
+
return chain, gr.Button.update(interactive=True)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def ask_question(chain, question_text):
|
| 31 |
+
logging.info(type(chain))
|
| 32 |
+
return chain({"question": question_text})["result"]
|
| 33 |
+
|
| 34 |
+
def create_ask(pdf_object, question_text):
|
| 35 |
+
loader = PDFMinerLoader(pdf_object.name)
|
| 36 |
+
index_creator = VectorstoreIndexCreator()
|
| 37 |
+
docsearch = index_creator.from_loaders([loader])
|
| 38 |
+
chain = RetrievalQA.from_chain_type(
|
| 39 |
+
llm=OpenAI(),
|
| 40 |
+
chain_type="stuff",
|
| 41 |
+
retriever=docsearch.vectorstore.as_retriever(),
|
| 42 |
+
input_key="question",
|
| 43 |
+
verbose=True,
|
| 44 |
+
return_source_documents=True,
|
| 45 |
+
)
|
| 46 |
+
return chain({"question": question_text})["result"]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
with gr.Blocks() as demo:
|
| 50 |
+
# pdf_button = gr.Button(value="pdf_button")
|
| 51 |
+
oai_token = gr.Textbox(label="OpenAI Token", placeholder="Lm-iIas452gaw3erGtPar26gERGSA5RVkFJQST23WEG524EWEl")
|
| 52 |
+
oai_token.change(set_openai_key, oai_token)
|
| 53 |
+
pdf_object = gr.File(
|
| 54 |
+
label="Upload your CV in PDF format",
|
| 55 |
+
file_count="single",
|
| 56 |
+
type="file")
|
| 57 |
+
chain_state = gr.State()
|
| 58 |
+
# pdf_button.click(create_langchain, inputs=pdf_object, outputs=chain_state)
|
| 59 |
+
question_box = gr.Textbox(label="Question", value="Which are the candidate top hard skills? Enumerate in few bullet points.")
|
| 60 |
+
qa_button = gr.Button(value="Submit question", interactive=False)
|
| 61 |
+
|
| 62 |
+
lchain = pdf_object.change(create_langchain, inputs=pdf_object, outputs=[chain_state, qa_button])
|
| 63 |
+
qa_button.click(ask_question, inputs=[chain_state, question_box], outputs=gr.Textbox(label="Answer"))
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai
|
| 2 |
+
gradio
|
| 3 |
+
langchain
|
| 4 |
+
pdfminer.six
|
| 5 |
+
chromadb
|
| 6 |
+
tiktoken
|