agentharbor commited on
Commit
80116c5
·
verified ·
1 Parent(s): 50bf3a8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google import genai
2
+ client = genai.Client(api_key="AIzaSyD6voSAiSUim17kB90skpdisMMyFXZPxMo")
3
+ MODEL_ID = "gemini-2.0-flash-exp"
4
+ def model_response(text):
5
+
6
+ response = client.models.generate_content(
7
+ model=MODEL_ID,
8
+ contents=text
9
+ )
10
+ return response.text
11
+
12
+ def generate_dataset_queries(dataset_id,generated_glossary, schema_relationships):
13
+ queries = model_response(f'''You are an expert in drafting BQ queries. Look at the dataset: {dataset_id}, look at the glossary: {generated_glossary} and {schema_relationships} and
14
+ recommend interesting data exploration queries. Format:
15
+ Query description in the form of a single line question
16
+ Actual query''')
17
+ return queries
18
+
19
+ def generate_lookml(dataset_id,generated_glossary, schema_relationships):
20
+ lookml = model_response(f'''You are an expert in drafting LookML models. Look at the dataset: {dataset_id}, look at the glossary: {generated_glossary} and {schema_relationships} and
21
+ recommend the LookML semantic model corresponding to the dataset. ''')
22
+ return lookml
23
+
24
+ def run(DATASET_ID):
25
+ dataset_description = None
26
+ py = None
27
+ schema_relations = None
28
+ generated_glossary = None
29
+ queries = None
30
+ lookml = None
31
+ lookml_explore = None
32
+ dataset = model_response(f'''You are an expert in BQ public datasets. Generate a dataset schema related to {DATASET_ID}. You need to come up with atleast 5 tables with each table
33
+ containing atleast 10 columns along with their descriptions.''')
34
+ dataset_description = model_response(f'''Generate a succinct 3-4 line description of the dataset: {dataset}.''')
35
+ yield dataset_description, None, None, None, None, None, None
36
+
37
+ #get_table_doc(PROJECT_ID, DATASET_ID)
38
+
39
+ py = model_response(f'''Based on the dataset provided: {dataset}, identify all the possible relationships
40
+ that exist between the tables in the dataset. Discover these relationships from
41
+ the point of view of data exploration.
42
+ Output:
43
+ List of relationships along with the description which is the business value of the relationship and a query
44
+ with description that validates the relationship.
45
+
46
+ Ensure that the column names and table names are accurate.''')
47
+ yield dataset_description, py, schema_relations, None, None, None, None
48
+
49
+
50
+ schema_relations = model_response(f'''Based on the context: {py}, generate a knowledge graph represented using ASCII art. Also generate a brief description of the graph.
51
+ Output:
52
+ Description of the graph listing all the relationships
53
+ ASCII version of the knowledge graph with nodes represented by tables and edges represented by the relationships. Edges should be annotated with the type of relationships identified - many-to-one, many-to-many, one-to-one, primary key, self joins, foreign keys etc''')
54
+ yield dataset_description, py, schema_relations, None, None, None, None
55
+ generated_glossary = model_response(f'''Based on the relationships identified: {schema_relations}
56
+ and the dataset: {dataset_description}, generate glossary terms that will help business users easily find the tables in the dataset.
57
+ ## Task
58
+ - Your goal is to create a business glossary for the data in this dataset, aligned with the definition of business glossary specified above.
59
+ - Provide each business term in a newline, along with the definition.
60
+ - Include examples in the term definitions, wherever suitable.
61
+ - Make sure the business terms are relevant as per the table and column names and descriptions, and relevant to the domain to which the data belongs.
62
+ - Also include a few business terms around the users/clients and around 5 key metrics in the domain of the data.
63
+ - After defining the terms, identify the relationships between the business terms identified previously.
64
+
65
+ ## Output format
66
+ - Output each business glossary term definition in a newline in the folowing format:
67
+ term: definition
68
+ - For the business terms which are the key metrics in the business domain, mark such terms by adding "[METRIC]" in the beginning of the line, in the following format:
69
+ [METRIC] term: definition
70
+ - Then print a header to indicate the end of this section and start of the relationships section.
71
+ - Then output the relationships between the business terms as follows:
72
+ term -> [related_term1, related_term2]
73
+ Show the relationship between the glossary term and the column broken down by each table.
74
+ ''')
75
+ yield dataset_description, py, schema_relations, generated_glossary, None, None, None
76
+ queries = generate_dataset_queries(dataset, generated_glossary, schema_relations)
77
+ yield dataset_description, py, schema_relations, generated_glossary, queries, None, None
78
+ lookml = generate_lookml(dataset, generated_glossary, schema_relations)
79
+ yield dataset_description, py, schema_relations, generated_glossary, queries, lookml, lookml_explore
80
+ return dataset_description, py, schema_relations, generated_glossary, queries, lookml, lookml_explore
81
+
82
+ # Modify the wrapper function to yield a tuple for Gradio outputs
83
+ def wrapper(dataset_id):
84
+ for outputs in run(dataset_id):
85
+ yield (
86
+ outputs[1],
87
+ outputs[2], # Schema Relationships
88
+ outputs[3], # Generated Glossary
89
+ outputs[4], # Queries
90
+ outputs[5] # LookML Model
91
+ )
92
+
93
+ import gradio as gr
94
+
95
+ iface = gr.Interface(
96
+ fn=wrapper,
97
+ inputs=gr.Textbox(label="Dataset ID"),
98
+ outputs=[
99
+ gr.Markdown(label="Knowledge Graph"),
100
+ gr.Markdown(label="Schema Relationships"),
101
+ gr.Markdown(label="Generated Glossary"),
102
+ gr.Textbox(label="Queries"),
103
+ gr.Markdown(label="LookML Model"),
104
+ gr.Markdown(label="LookML Explore"),
105
+ ],
106
+ live=False,
107
+ theme = gr.themes.Ocean(),
108
+ title="BQ knowledge engine ⚙️💡📊 (Research preview)",
109
+ description="Provide a dataset ID to generate LookML, schema relationships, glossary, and more, with live updates.", examples=['ncaa_basketball2', 'supply_chain_ashwins','thelook_ecommerce','CORTEX_SAP_CDC','dt_kg_demo','geo_openstreetmap','google_political_ads','noaa_historic_severe_storms','stackoverflow']
110
+ )
111
+ # Launch the app
112
+ iface.launch(share=True, debug=True)