Spaces:

savi-cyber
/

Document_QnA

Sleeping

App Files Files Community

Document_QnA / app.py

savi-cyber

Update app.py

49a9335 verified over 1 year ago

raw

history blame contribute delete

4.7 kB

	## Setup

	# Import the necessary Libraries
	import os
	import uuid
	import json
	import pandas as pd
	import gradio as gr

	from huggingface_hub import CommitScheduler, HfApi
	from pathlib import Path
	from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
	from langchain_community.vectorstores import Chroma
	from openai import OpenAI
	from datetime import datetime

	hf_token = os.getenv("HF_TOKEN1")
	openai_api = os.getenv("MIT_Project_key")

	# Create Client
	client = OpenAI(
	api_key=openai_api
	)

	# Define the embedding model and the vectorstore
	embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
	persisted_vectordb_location = './companies_db/'

	# Load the persisted vectorDB
	vectorstore_persisted = Chroma(
	collection_name="companies_10k_2023",
	persist_directory=persisted_vectordb_location,
	embedding_function=embedding_model
	)

	# Prepare the logging functionality
	log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
	log_folder = log_file.parent

	#Create scheduler
	scheduler = CommitScheduler(
	repo_id="Project3",
	repo_type="dataset",
	folder_path=log_folder,
	path_in_repo="data",
	every=2,
	token=hf_token
	)

	# Define the Q&A system message
	qna_system_message = """
	System Message:

	You are provided with a set of quotes and a question. Your task is to:

	Answer the Question: Use the information from the quotes to provide a concise and accurate answer to the question.
	Quote Source: Select a relevant quote that supports your answer.
	Document Page Number: Indicate the page number from which the quote is taken.
	Input:

	Quotes: [List of quotes from the document]
	Question: [The question to be answered]
	Output:

	Answer: [Provide the answer to the question here]
	Quote: [Select and present the relevant quote here]
	Document Page Number: [Specify the page number of the quote]"""


	# Define the user message template
	qna_user_message_template = """
	###Context
	Here are some documents that are relevant to the question mentioned below.
	{context}

	###Question
	{question}
	"""


	# Define the predict function that runs when 'Submit' is clicked or when a API request is made
	def predict(user_input,company):

	filter = "/content/dataset/" + company + "-10-k-2023.pdf"
	relevant_document_chunks = vectorstore_persisted.similarity_search(user_input, k=5, filter={"source":filter})

	# Create context_for_query
	context_for_query=""
	for i, doc in enumerate(relevant_document_chunks):
	context_for_query+=(f"Retrieved chunk {i+1}: \n")
	context_for_query+=(doc.page_content + "\n")
	context_for_query+=("Source: " + filter + "\n")
	context_for_query+=("Page Number: "+ str(doc.metadata['page'])+"\n")

	# define
	# Create messages
	prompt = [
	{'role':'system', 'content': qna_system_message},
	{'role': 'user', 'content': qna_user_message_template.format(
	context=context_for_query,
	question=user_input
	)
	}
	]

	# Get response from the LLM
	try:
	response = client.chat.completions.create(
	model='gpt-3.5-turbo',
	messages=prompt,
	temperature=0
	)

	prediction = response.choices[0].message.content

	except Exception as e:
	prediction = f'Sorry, I encountered the following error: \n {e}'
	return prediction
	# While the prediction is made, log both the inputs and outputs to a local log file
	# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel access

	with scheduler.lock:
	with log_file.open("a") as f:
	f.write(json.dumps(
	{
	'user_input': user_input,
	'retrieved_context': context_for_query,
	'model_response': prediction
	}
	))
	f.write("\n")

	return prediction

	# companies list for dropdown box
	companies=["Meta", "IBM", "google", "msft", "aws"]

	# Create the interface
	with gr.Blocks() as demo:
	with gr.Row():
	dropdown=gr.Dropdown(
	choices=companies,
	label='Company_file'
	)

	textbox=gr.Textbox(
	label='Enter your query',
	placeholder='Type your query here'
	)

	magic_button=gr.Button("Get Answer")

	with gr.Row():
	magic_sauce= gr.Textbox(
	label="Answer",
	placeholder="Your magic sauce will be displayed here"
	)
	magic_button.click(
	predict,
	inputs=[textbox,dropdown],
	outputs=[magic_sauce]
	)


	# For the inputs parameter of Interface provide [textbox,company]
	demo.launch(share=True, show_error=True, debug=True)