| import pandas as pd | |
| from openai import OpenAI | |
| import os | |
| from google.cloud import bigquery | |
| import numpy as np | |
| import gradio as gr | |
| project_id = os.getenv('project_id') | |
| dataset_id = os.getenv('dataset_id') | |
| table_id = os.getenv('table_id') | |
| openai_client = OpenAI() | |
| def fetch_table_schema(project_id, dataset_id, table_id): | |
| bqclient = bigquery.Client(project=project_id) | |
| table_ref = f"{project_id}.{dataset_id}.{table_id}" | |
| table = bqclient.get_table(table_ref) | |
| schema_dict = {} | |
| for schema_field in table.schema: | |
| schema_dict[schema_field.name] = schema_field.field_type | |
| return schema_dict | |
| def get_sql_query(description): | |
| prompt = f''' | |
| Generate the SQL query for the following task:\n{description}.\n | |
| The database you need is called {dataset_id} and the table is called {table_id}. | |
| Use the format {dataset_id}.{table_id} as the table name in the queries. | |
| Enclose column names in backticks(`) not quotation marks. | |
| Do not assign aliases to the columns. | |
| Do not calculate new columns, unless specifically called to. | |
| Return only the SQL query, nothing else. | |
| Do not use WITHIN GROUP clause. | |
| \nThe list of all the columns is as follows: {schema} /n | |
| ''' | |
| try: | |
| completion = openai_client.chat.completions.create( | |
| model='gpt-4o', | |
| messages = [ | |
| {"role": "system", "content": "You are an expert Data Scientist with in-depth knowledge of SQL, working on Network Telemetry Data."}, | |
| {"role": "user", "content": f'{prompt}'}, | |
| ] | |
| ) | |
| sql_query = completion.choices[0].message.content.strip().split('```sql')[1].split('```')[0] | |
| except Exception as e: | |
| print(f'The following error ocurred: {e}\n') | |
| sql_query = None | |
| return sql_query | |
| schema = fetch_table_schema(project_id, dataset_id, table_id) | |
| def execute_sql_query(query): | |
| client = bigquery.Client() | |
| try: | |
| result = client.query(query).to_dataframe() | |
| message = f'The query:{query} was successfully executed.' | |
| except Exception as e: | |
| result = None | |
| message = f'The query:{query} could not be executed due to the following exception:\n{e}' | |
| return result, message | |
| def echo(text): | |
| query = get_sql_query(text) | |
| if query is None: | |
| return 'No query generated', 'No query generated' | |
| result, message = execute_sql_query(query) | |
| return result, message | |
| def gradio_interface(text): | |
| result, message = echo(text) | |
| if isinstance(result, pd.DataFrame): | |
| return gr.Dataframe(value=result), message | |
| else: | |
| return result, message | |
| def gradio_interface(text): | |
| result, message = echo(text) | |
| if isinstance(result, pd.DataFrame): | |
| return gr.Dataframe(value=result), message | |
| else: | |
| return result, message | |
| demo = gr.Blocks( | |
| title="Text-to-SQL", | |
| theme='remilia/ghostly', | |
| ) | |
| with demo: | |
| gr.Markdown( | |
| ''' | |
| # <p style="text-align: center;">Text to SQL Query Engine</p> | |
| <p style="text-align: center;"> | |
| Welcome to our Text2SQL Engine. | |
| <br> | |
| Enter your query in natural language and we'll convert it to SQL and return the result to you. | |
| </p> | |
| ''' | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text_input = gr.Textbox(label="Enter your query") | |
| button = gr.Button("Submit") | |
| gr.Examples([ | |
| 'Find the correlation between RTT and Jitter for each Market', | |
| 'Find the variance in Jitter for each 5G_Reliability_Category', | |
| 'Find the count of records per 5G_Reliability_Category where 5G_Reliability_Value is below the average for the category', | |
| 'Calculate the standard deviation of 5G_Reliability_Score for each Network_Engineer', | |
| 'Determine the Sector with the highest variance in 5G Reliability Value and its corresponding average Context Drop Percent' | |
| ], | |
| inputs=[text_input] | |
| ) | |
| with gr.Column(scale=3): | |
| output_text = gr.Textbox(label="Output", interactive=False) | |
| output_df = gr.Dataframe(interactive=False) | |
| def update_output(text): | |
| result, message = gradio_interface(text) | |
| if result and isinstance(result, pd.DataFrame): | |
| return result, message, gr.update(visible=True) | |
| else: | |
| return result, message, gr.update(visible=False) | |
| button.click(update_output, inputs=text_input, outputs=[output_df, output_text]) | |
| demo.launch(debug=True, auth=("admin", "Text2SQL")) |