| import ast
|
| import gradio as gr
|
| from functions import example_question_generator, chatbot_func
|
| from data_sources import connect_doc_db
|
| from utils import message_dict
|
|
|
| def hide_info():
|
| return gr.update(visible=False)
|
|
|
| with gr.Blocks() as demo:
|
| description = gr.HTML("""
|
| <!-- Header -->
|
| <div class="max-w-4xl mx-auto mb-12 text-center">
|
| <div class="bg-blue-50 border border-blue-200 rounded-lg max-w-2xl mx-auto">
|
| <p>This tool allows users to communicate with and query real time data from a Document DB (MongoDB for now, others can be added if requested) using natural
|
| language and the above features.</p>
|
| <p style="font-weight:bold;">Notice: the way this system is designed, no login information is retained and credentials are passed as session variables until the user leaves or
|
| refreshes the page in which they disappear. They are never saved to any files. I also make use of the PyMongoArrow aggregate_pandas_all function to apply pipelines,
|
| which can't delete, drop, or add database lines to avoid unhappy accidents or glitches.
|
| That being said, it's probably best to use caution when connecting to a production database to a strange AI tool with an unfamiliar author.
|
| This should be for demonstration purposes.</p>
|
| <p>Contact me if this is something you would like built in your organization, on your infrastructure, and with the requisite privacy and control a production
|
| database analytics tool requires.</p>
|
| </div>
|
| </div>
|
| """, elem_classes="description_component")
|
|
|
| status_message = gr.HTML(value='<p style="color:green;text-align:center;font-size:18px;">Please be patient while connecting as we need to generate '
|
| 'and read a schema before connection can be successful. This process can take a few minutes.</p>', padding=False)
|
|
|
| connection_string = gr.Textbox(label="Connection String", value="dataanalyst0.l1klmww.mongodb.net/")
|
| with gr.Row():
|
| connection_user = gr.Textbox(label="Connection User", value="virtual-data-analyst")
|
| connection_password = gr.Textbox(label="Connection Password", value="zcpbmoGJ3mC8o", type="password")
|
| doc_db_name = gr.Textbox(label="Database Name", value="sample_mflix")
|
|
|
| submit = gr.Button(value="Submit")
|
| submit.click(fn=hide_info, outputs=description)
|
|
|
| @gr.render(inputs=[connection_string,connection_user,connection_password,doc_db_name], triggers=[submit.click])
|
| def db_chat(request: gr.Request, connection_string=connection_string.value, connection_user=connection_user.value, connection_password=connection_password.value, doc_db_name=doc_db_name.value):
|
| if request.session_hash not in message_dict:
|
| message_dict[request.session_hash] = {}
|
| message_dict[request.session_hash]['doc_db'] = None
|
| connection_login_value = "mongodb+srv://" + connection_user + ":" + connection_password + "@" + connection_string
|
| if connection_login_value:
|
| print("MONGO APP")
|
| process_message = process_doc_db(connection_login_value, doc_db_name, request.session_hash)
|
| gr.HTML(value=process_message[1], padding=False)
|
| if process_message[0] == "success":
|
| if "dataanalyst0.l1klmww.mongodb.net" in connection_login_value:
|
| example_questions = [
|
| ["Describe the dataset"],
|
| ["What are the top 5 most common movie genres?"],
|
| ["How do user comment counts on a movie correlate with the movie award wins?"],
|
| ["Can you generate a pie chart showing the top 10 states with the most movie theaters?"],
|
| ["What are the top 10 most represented directors in the database?"],
|
| ["What are the different movie categories and how many movies are in each category?"]
|
| ]
|
| else:
|
| try:
|
| generated_examples = ast.literal_eval(example_question_generator(request.session_hash, 'graphql', doc_db_name, process_message[2], process_message[3]))
|
| example_questions = [
|
| ["Describe the dataset"]
|
| ]
|
| for example in generated_examples:
|
| example_questions.append([example])
|
| except Exception as e:
|
| print("DOC DB QUESTION GENERATION ERROR")
|
| print(e)
|
| example_questions = [
|
| ["Describe the dataset"],
|
| ["List the columns in the dataset"],
|
| ["What could this data be used for?"],
|
| ]
|
| session_hash = gr.Textbox(visible=False, value=request.session_hash)
|
| db_connection_string = gr.Textbox(visible=False, value=connection_login_value)
|
| db_name = gr.Textbox(visible=False, value=doc_db_name)
|
| titles = gr.Textbox(value=process_message[2], interactive=False, label="DB Collections")
|
| data_source = gr.Textbox(visible=False, value='doc_db')
|
| schema = gr.Textbox(visible=False, value=process_message[3])
|
| bot = gr.Chatbot(type='messages', label="DocDB Chat Window", render_markdown=True, sanitize_html=False, show_label=True, render=False, visible=True, elem_classes="chatbot")
|
| chat = gr.ChatInterface(
|
| fn=chatbot_func,
|
| type='messages',
|
| chatbot=bot,
|
| title="Chat with your Database",
|
| examples=example_questions,
|
| concurrency_limit=None,
|
| additional_inputs=[session_hash, data_source, titles, schema, db_connection_string, db_name]
|
| )
|
|
|
| def process_doc_db(connection_string, nosql_db_name, session_hash):
|
| if connection_string:
|
| process_message = connect_doc_db(connection_string, nosql_db_name, session_hash)
|
| return process_message
|
|
|
| if __name__ == "__main__":
|
| demo.launch() |