Spaces:

Osman2010
/

lida

Sleeping

App Files Files Community

Victor Dibia commited on Aug 14, 2023

Commit

61358ce

1 Parent(s): 8d3cc3c

recommender update

Browse files

Files changed (6) hide show

MANIFEST.in +3 -1
lida/modules/executor.py +2 -0
lida/modules/manager.py +2 -0
lida/modules/scaffold.py +1 -1
lida/modules/viz/vizrecommender.py +29 -7
notebooks/tutorial.ipynb +0 -0

MANIFEST.in CHANGED Viewed

@@ -1,2 +1,4 @@
 recursive-include  lida/web/ui *
-recursive-exclude  notebooks *

 recursive-include  lida/web/ui *
+recursive-exclude  notebooks *
+recursive-exclude  docs *
+recursive-exclude  tests *

lida/modules/executor.py CHANGED Viewed

@@ -41,6 +41,8 @@ def preprocess_code(code: str) -> str:
             code = code[index:]
     code = code.replace("```", "")
     return code

             code = code[index:]
     code = code.replace("```", "")
+    if "chart = plot(data)" not in code:
+        code = code + "\nchart = plot(data)"
     return code

lida/modules/manager.py CHANGED Viewed

@@ -219,6 +219,7 @@ class Manager(object):
         self,
         code,
         summary: Summary,
         textgen_config: TextGenerationConfig = TextGenerationConfig(),
         library: str = "seaborn",
     ):
@@ -237,6 +238,7 @@ class Manager(object):
         return self.recommender.generate(
             code=code,
             summary=summary,
             textgen_config=textgen_config,
             text_gen=self.text_gen,
             library=library,

         self,
         code,
         summary: Summary,
+        n=4,
         textgen_config: TextGenerationConfig = TextGenerationConfig(),
         library: str = "seaborn",
     ):
         return self.recommender.generate(
             code=code,
             summary=summary,
+            n=n,
             textgen_config=textgen_config,
             text_gen=self.text_gen,
             library=library,

lida/modules/scaffold.py CHANGED Viewed

@@ -17,7 +17,7 @@ class ChartScaffold(object):
         pass
     def get_template(self, goal: Goal, library: str):
-        mpl_pre = f"Set chart title to {goal.question}. If the solution requires a single value (e.g. max, min, median, first, last etc), ALWAYS add a line (axvline or axhline) to the chart, ALWAYS with a legend containing the single value (formatted with 0.2F). If using a <field> where semantic_type=date, YOU MUST APPLY the following transform before using that column i) convert date fields to date types using data[''] = pd.to_datetime(data[<field>], errors='coerce'), ALWAYS use  errors='coerce' ii) drop the rows with NaT values data = data[pd.notna(data[<field>])] iii) convert field to right time format for plotting.  ALWAYS make sure the x-axis labels are legible (e.g., rotate when needed). Use BaseMap for charts that require a map. Given the dataset summary, the plot(data) method should generate a {library} chart ({goal.visualization}) that addresses this goal: {goal.question}. The plot method must return a matplotlib object. Think step by step. \n"
         if library == "matplotlib":
             instructions = {"role": "assistant", "content": mpl_pre}

         pass
     def get_template(self, goal: Goal, library: str):
+        mpl_pre = f"Set chart title to {goal.question}. If the solution requires a single value (e.g. max, min, median, first, last etc), ALWAYS add a line (axvline or axhline) to the chart, ALWAYS with a legend containing the single value (formatted with 0.2F). If using a <field> where semantic_type=date, YOU MUST APPLY the following transform before using that column i) convert date fields to date types using data[''] = pd.to_datetime(data[<field>], errors='coerce'), ALWAYS use  errors='coerce' ii) drop the rows with NaT values data = data[pd.notna(data[<field>])] iii) convert field to right time format for plotting.  ALWAYS make sure the x-axis labels are legible (e.g., rotate when needed). Use BaseMap for charts that require a map. Given the dataset summary, the plot(data) method should generate a {library} chart ({goal.visualization}) that addresses this goal: {goal.question}. Do not include plt.show(). The plot method must return a matplotlib object. Think step by step. \n"
         if library == "matplotlib":
             instructions = {"role": "assistant", "content": mpl_pre}

lida/modules/viz/vizrecommender.py CHANGED Viewed

@@ -1,4 +1,6 @@
 from lida.modules.scaffold import ChartScaffold
 from llmx import TextGenerator, TextGenerationConfig, TextGenerationResponse
 # from lida.modules.scaffold import ChartScaffold
@@ -6,9 +8,16 @@ from lida.datamodel import Goal, Summary
 system_prompt = """
-You are a helpful assistant highly skilled in recommending a DIVERSE set of visualizations. Your input is an existing visualization, and  a summary of a dataset and an example visualization goal. Given this input, your task is to recommend an additional DIVERSE visualization that a user may be interesting to a user. Consider different types of valid aggregations, chart types, and use different variables from the data summary. THE CODE YOU GENERATE MUST BE CORRECT AND FOLLOW VISUALIZATION BEST PRACTICES. You MUST return a full program.  DO NOT include any preamble text. Do not include explanations or prose.
 """
 class VizRecommender(object):
     """Generate visualizations from prompt"""
@@ -22,6 +31,7 @@ class VizRecommender(object):
             self, code: str, summary: Summary,
             textgen_config: TextGenerationConfig,
             text_gen: TextGenerator,
             library='altair'):
         """Recommend a code spec based on existing visualization"""
@@ -36,12 +46,24 @@ class VizRecommender(object):
             {"role": "system", "content": f"The dataset summary is : {summary}"},
             {"role": "system",
              "content":
-             f"The original visualization code is: {code}.  You MUST use only the {library} library with the following instructions {library_instructions}. The resulting code MUST use the following template {library_template}."},
-            {"role": "user", "content": "Now write code for an additional visualizations that a user may be interested in given the goal and the dataset summary above."}
         ]
         textgen_config.messages = messages
-        completions: TextGenerationResponse = text_gen.generate(
             messages=messages, config=textgen_config)
-        return [x['content'] for x in completions.text]

+import logging
+import json
+from lida.utils import clean_code_snippet
 from lida.modules.scaffold import ChartScaffold
 from llmx import TextGenerator, TextGenerationConfig, TextGenerationResponse
 # from lida.modules.scaffold import ChartScaffold
 system_prompt = """
+You are a helpful assistant highly skilled in recommending a DIVERSE set of visualizations as code. Your input is an example visualization code,  a summary of a dataset and an example visualization goal. Given this input, your task is to recommend an additional DIVERSE visualizations that a user may be interesting to a user. Consider different types of valid aggregations, chart types, and use different variables from the data summary. THE CODE YOU GENERATE MUST BE CORRECT AND FOLLOW VISUALIZATION BEST PRACTICES.
+Your output MUST be perfect JSON in THE FORM OF A VALID JSON LIST without any additional explanation  e.g.,
+[{"code": "import ...", "index":0}, .. {"code": "import ...", "index":1} ]
 """
+# refactor this to return n predictions ...
+logger = logging.getLogger(__name__)
 class VizRecommender(object):
     """Generate visualizations from prompt"""
             self, code: str, summary: Summary,
             textgen_config: TextGenerationConfig,
             text_gen: TextGenerator,
+            n=3,
             library='altair'):
         """Recommend a code spec based on existing visualization"""
             {"role": "system", "content": f"The dataset summary is : {summary}"},
             {"role": "system",
              "content":
+             f"An example visualization code is: {code}. You MUST use only the {library} library with the following instructions {library_instructions}. Each recommended visualization CODE MUST use the following template {library_template}."},
+            {"role": "user", "content": f"Now write code for {n} visualizations in the JSON list format. YOU MUST RETURN ONLY A JSON LIST"}
         ]
         textgen_config.messages = messages
+        result: TextGenerationResponse = text_gen.generate(
             messages=messages, config=textgen_config)
+        try:
+            json_string = clean_code_snippet(result.text[0]["content"])
+            result = json.loads(json_string)
+            if isinstance(result, dict):
+                result = [result]
+            result = [x["code"] for x in result]
+        except json.decoder.JSONDecodeError:
+            logger.info(
+                f"Error decoding JSON for generated visualization recommendations: {result.text[0]['content']}")
+            print(
+                f"Error decoding JSON for generated visualization recommendations: {result.text[0]['content']}")
+            raise ValueError(
+                "The model did not return a valid JSON object while attempting generate visualization recommendations. Please try again.")
+        return result

notebooks/tutorial.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff