Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

virtual-data-analyst / templates /doc_db.py

nolanzandi

refactor chat functions (#39)

e448d98 verified 12 months ago

raw

history blame

6.95 kB

	import ast
	import gradio as gr
	from functions import example_question_generator, chatbot_func
	from data_sources import connect_doc_db
	from utils import message_dict

	def hide_info():
	return gr.update(visible=False)

	with gr.Blocks() as demo:
	description = gr.HTML("""
	<!-- Header -->
	<div class="max-w-4xl mx-auto mb-12 text-center">
	<div class="bg-blue-50 border border-blue-200 rounded-lg max-w-2xl mx-auto">
	<p>This tool allows users to communicate with and query real time data from a Document DB (MongoDB for now, others can be added if requested) using natural
	language and the above features.</p>
	<p style="font-weight:bold;">Notice: the way this system is designed, no login information is retained and credentials are passed as session variables until the user leaves or
	refreshes the page in which they disappear. They are never saved to any files. I also make use of the PyMongoArrow aggregate_pandas_all function to apply pipelines,
	which can't delete, drop, or add database lines to avoid unhappy accidents or glitches.
	That being said, it's probably best to use caution when connecting to a production database to a strange AI tool with an unfamiliar author.
	This should be for demonstration purposes.</p>
	<p>Contact me if this is something you would like built in your organization, on your infrastructure, and with the requisite privacy and control a production
	database analytics tool requires.</p>
	</div>
	</div>
	""", elem_classes="description_component")

	status_message = gr.HTML(value='<p style="color:green;text-align:center;font-size:18px;">Please be patient while connecting as we need to generate '
	'and read a schema before connection can be successful. This process can take a few minutes.</p>', padding=False)

	connection_string = gr.Textbox(label="Connection String", value="dataanalyst0.l1klmww.mongodb.net/")
	with gr.Row():
	connection_user = gr.Textbox(label="Connection User", value="virtual-data-analyst")
	connection_password = gr.Textbox(label="Connection Password", value="zcpbmoGJ3mC8o", type="password")
	doc_db_name = gr.Textbox(label="Database Name", value="sample_mflix")

	submit = gr.Button(value="Submit")
	submit.click(fn=hide_info, outputs=description)

	@gr.render(inputs=[connection_string,connection_user,connection_password,doc_db_name], triggers=[submit.click])
	def db_chat(request: gr.Request, connection_string=connection_string.value, connection_user=connection_user.value, connection_password=connection_password.value, doc_db_name=doc_db_name.value):
	if request.session_hash not in message_dict:
	message_dict[request.session_hash] = {}
	message_dict[request.session_hash]['doc_db'] = None
	connection_login_value = "mongodb+srv://" + connection_user + ":" + connection_password + "@" + connection_string
	if connection_login_value:
	print("MONGO APP")
	process_message = process_doc_db(connection_login_value, doc_db_name, request.session_hash)
	gr.HTML(value=process_message[1], padding=False)
	if process_message[0] == "success":
	if "dataanalyst0.l1klmww.mongodb.net" in connection_login_value:
	example_questions = [
	["Describe the dataset"],
	["What are the top 5 most common movie genres?"],
	["How do user comment counts on a movie correlate with the movie award wins?"],
	["Can you generate a pie chart showing the top 10 states with the most movie theaters?"],
	["What are the top 10 most represented directors in the database?"],
	["What are the different movie categories and how many movies are in each category?"]
	]
	else:
	try:
	generated_examples = ast.literal_eval(example_question_generator(request.session_hash, 'graphql', doc_db_name, process_message[2], process_message[3]))
	example_questions = [
	["Describe the dataset"]
	]
	for example in generated_examples:
	example_questions.append([example])
	except Exception as e:
	print("DOC DB QUESTION GENERATION ERROR")
	print(e)
	example_questions = [
	["Describe the dataset"],
	["List the columns in the dataset"],
	["What could this data be used for?"],
	]
	session_hash = gr.Textbox(visible=False, value=request.session_hash)
	db_connection_string = gr.Textbox(visible=False, value=connection_login_value)
	db_name = gr.Textbox(visible=False, value=doc_db_name)
	titles = gr.Textbox(value=process_message[2], interactive=False, label="DB Collections")
	data_source = gr.Textbox(visible=False, value='doc_db')
	schema = gr.Textbox(visible=False, value=process_message[3])
	bot = gr.Chatbot(type='messages', label="DocDB Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
	chat = gr.ChatInterface(
	fn=chatbot_func,
	type='messages',
	chatbot=bot,
	title="Chat with your Database",
	examples=example_questions,
	concurrency_limit=None,
	additional_inputs=[session_hash, data_source, titles, schema, db_connection_string, db_name]
	)

	def process_doc_db(connection_string, nosql_db_name, session_hash):
	if connection_string:
	process_message = connect_doc_db(connection_string, nosql_db_name, session_hash)
	return process_message

	if __name__ == "__main__":
	demo.launch()