Document_QnA / app.py
savi-cyber's picture
Update app.py
49a9335 verified
## Setup
# Import the necessary Libraries
import os
import uuid
import json
import pandas as pd
import gradio as gr
from huggingface_hub import CommitScheduler, HfApi
from pathlib import Path
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from openai import OpenAI
from datetime import datetime
hf_token = os.getenv("HF_TOKEN1")
openai_api = os.getenv("MIT_Project_key")
# Create Client
client = OpenAI(
api_key=openai_api
)
# Define the embedding model and the vectorstore
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
persisted_vectordb_location = './companies_db/'
# Load the persisted vectorDB
vectorstore_persisted = Chroma(
collection_name="companies_10k_2023",
persist_directory=persisted_vectordb_location,
embedding_function=embedding_model
)
# Prepare the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent
#Create scheduler
scheduler = CommitScheduler(
repo_id="Project3",
repo_type="dataset",
folder_path=log_folder,
path_in_repo="data",
every=2,
token=hf_token
)
# Define the Q&A system message
qna_system_message = """
System Message:
You are provided with a set of quotes and a question. Your task is to:
Answer the Question: Use the information from the quotes to provide a concise and accurate answer to the question.
Quote Source: Select a relevant quote that supports your answer.
Document Page Number: Indicate the page number from which the quote is taken.
Input:
Quotes: [List of quotes from the document]
Question: [The question to be answered]
Output:
Answer: [Provide the answer to the question here]
Quote: [Select and present the relevant quote here]
Document Page Number: [Specify the page number of the quote]"""
# Define the user message template
qna_user_message_template = """
###Context
Here are some documents that are relevant to the question mentioned below.
{context}
###Question
{question}
"""
# Define the predict function that runs when 'Submit' is clicked or when a API request is made
def predict(user_input,company):
filter = "/content/dataset/" + company + "-10-k-2023.pdf"
relevant_document_chunks = vectorstore_persisted.similarity_search(user_input, k=5, filter={"source":filter})
# Create context_for_query
context_for_query=""
for i, doc in enumerate(relevant_document_chunks):
context_for_query+=(f"Retrieved chunk {i+1}: \n")
context_for_query+=(doc.page_content + "\n")
context_for_query+=("Source: " + filter + "\n")
context_for_query+=("Page Number: "+ str(doc.metadata['page'])+"\n")
# define
# Create messages
prompt = [
{'role':'system', 'content': qna_system_message},
{'role': 'user', 'content': qna_user_message_template.format(
context=context_for_query,
question=user_input
)
}
]
# Get response from the LLM
try:
response = client.chat.completions.create(
model='gpt-3.5-turbo',
messages=prompt,
temperature=0
)
prediction = response.choices[0].message.content
except Exception as e:
prediction = f'Sorry, I encountered the following error: \n {e}'
return prediction
# While the prediction is made, log both the inputs and outputs to a local log file
# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel access
with scheduler.lock:
with log_file.open("a") as f:
f.write(json.dumps(
{
'user_input': user_input,
'retrieved_context': context_for_query,
'model_response': prediction
}
))
f.write("\n")
return prediction
# companies list for dropdown box
companies=["Meta", "IBM", "google", "msft", "aws"]
# Create the interface
with gr.Blocks() as demo:
with gr.Row():
dropdown=gr.Dropdown(
choices=companies,
label='Company_file'
)
textbox=gr.Textbox(
label='Enter your query',
placeholder='Type your query here'
)
magic_button=gr.Button("Get Answer")
with gr.Row():
magic_sauce= gr.Textbox(
label="Answer",
placeholder="Your magic sauce will be displayed here"
)
magic_button.click(
predict,
inputs=[textbox,dropdown],
outputs=[magic_sauce]
)
# For the inputs parameter of Interface provide [textbox,company]
demo.launch(share=True, show_error=True, debug=True)