Spaces:

agentharbor
/

bke

Runtime error

App Files Files Community

agentharbor commited on Jan 28, 2025

Commit

80116c5

verified ·

1 Parent(s): 50bf3a8

Create app.py

Browse files

Files changed (1) hide show

app.py +112 -0

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from google import genai
+client = genai.Client(api_key="AIzaSyD6voSAiSUim17kB90skpdisMMyFXZPxMo")
+MODEL_ID = "gemini-2.0-flash-exp"
+def model_response(text):
+    response = client.models.generate_content(
+    model=MODEL_ID,
+    contents=text
+)
+    return response.text
+def generate_dataset_queries(dataset_id,generated_glossary, schema_relationships):
+  queries = model_response(f'''You are an expert in drafting BQ queries. Look at the dataset: {dataset_id}, look at the glossary: {generated_glossary} and {schema_relationships} and
+  recommend interesting data exploration queries. Format:
+  Query description in the form of a single line question
+  Actual query''')
+  return queries
+def generate_lookml(dataset_id,generated_glossary, schema_relationships):
+  lookml = model_response(f'''You are an expert in drafting LookML models. Look at the dataset: {dataset_id}, look at the glossary: {generated_glossary} and {schema_relationships} and
+  recommend the LookML semantic model corresponding to the dataset. ''')
+  return lookml
+def run(DATASET_ID):
+  dataset_description = None
+  py = None
+  schema_relations = None
+  generated_glossary = None
+  queries = None
+  lookml = None
+  lookml_explore = None
+  dataset = model_response(f'''You are an expert in BQ public datasets. Generate a dataset schema related to {DATASET_ID}. You need to come up with atleast 5 tables with each table
+containing atleast 10 columns along with their descriptions.''')
+  dataset_description = model_response(f'''Generate a succinct 3-4 line description of the dataset: {dataset}.''')
+  yield dataset_description, None, None, None, None, None, None
+  #get_table_doc(PROJECT_ID, DATASET_ID)
+  py = model_response(f'''Based on the dataset provided: {dataset}, identify all the possible relationships
+    that exist between the tables in the dataset. Discover these relationships from
+    the point of view of data exploration.
+    Output:
+    List of relationships along with the description which is the business value of the relationship and a query
+    with description that validates the relationship.
+    Ensure that the column names and table names are accurate.''')
+  yield dataset_description, py, schema_relations, None, None, None, None
+  schema_relations = model_response(f'''Based on the context: {py}, generate a knowledge graph represented using ASCII art. Also generate a brief description of the graph.
+    Output:
+    Description of the graph listing all the relationships
+    ASCII version of the knowledge graph with nodes represented by tables and edges represented by the relationships. Edges should be annotated with the type of relationships identified - many-to-one, many-to-many, one-to-one, primary key, self joins, foreign keys etc''')
+  yield dataset_description, py, schema_relations, None, None, None, None
+  generated_glossary = model_response(f'''Based on the relationships identified: {schema_relations}
+    and the dataset: {dataset_description}, generate glossary terms that will help business users easily find the tables in the dataset.
+    ## Task
+  - Your goal is to create a business glossary for the data in this dataset, aligned with the definition of business glossary specified above.
+  - Provide each business term in a newline, along with the definition.
+  - Include examples in the term definitions, wherever suitable.
+  - Make sure the business terms are relevant as per the table and column names and descriptions, and relevant to the domain to which the data belongs.
+  - Also include a few business terms around the users/clients and around 5 key metrics in the domain of the data.
+  - After defining the terms, identify the relationships between the business terms identified previously.
+  ## Output format
+  - Output each business glossary term definition in a newline in the folowing format:
+  term: definition
+  - For the business terms which are the key metrics in the business domain, mark such terms by adding "[METRIC]" in the beginning of the line, in the following format:
+  [METRIC] term: definition
+  - Then print a header to indicate the end of this section and start of the relationships section.
+  - Then output the relationships between the business terms as follows:
+  term -> [related_term1, related_term2]
+    Show the relationship between the glossary term and the column broken down by each table.
+    ''')
+  yield dataset_description, py, schema_relations, generated_glossary, None, None, None
+  queries = generate_dataset_queries(dataset, generated_glossary, schema_relations)
+  yield dataset_description, py, schema_relations, generated_glossary, queries, None, None
+  lookml = generate_lookml(dataset, generated_glossary, schema_relations)
+  yield dataset_description, py, schema_relations, generated_glossary, queries, lookml, lookml_explore
+  return dataset_description, py, schema_relations, generated_glossary, queries, lookml, lookml_explore
+# Modify the wrapper function to yield a tuple for Gradio outputs
+def wrapper(dataset_id):
+    for outputs in run(dataset_id):
+        yield (
+            outputs[1],
+            outputs[2],  # Schema Relationships
+            outputs[3],  # Generated Glossary
+            outputs[4],  # Queries
+            outputs[5]  # LookML Model
+        )
+import gradio as gr
+iface = gr.Interface(
+        fn=wrapper,
+        inputs=gr.Textbox(label="Dataset ID"),
+        outputs=[
+            gr.Markdown(label="Knowledge Graph"),
+            gr.Markdown(label="Schema Relationships"),
+            gr.Markdown(label="Generated Glossary"),
+            gr.Textbox(label="Queries"),
+            gr.Markdown(label="LookML Model"),
+            gr.Markdown(label="LookML Explore"),
+        ],
+        live=False,
+        theme = gr.themes.Ocean(),
+        title="BQ knowledge engine ⚙️💡📊 (Research preview)",
+        description="Provide a dataset ID to generate LookML, schema relationships, glossary, and more, with live updates.", examples=['ncaa_basketball2', 'supply_chain_ashwins','thelook_ecommerce','CORTEX_SAP_CDC','dt_kg_demo','geo_openstreetmap','google_political_ads','noaa_historic_severe_storms','stackoverflow']
+    )
+# Launch the app
+iface.launch(share=True, debug=True)