DanilaKopitayko commited on
Commit
bb78df3
·
1 Parent(s): 7765653

ETL pipeline added

Browse files

ETL pipeline now appears after pressing the Analyze button. Some stylistical changes to README

Files changed (2) hide show
  1. README.md +6 -3
  2. examples/data_agent_demo.ipynb +36 -25
README.md CHANGED
@@ -15,10 +15,13 @@
15
  </h2>
16
 
17
  <div align='center'>
18
- <a href="https://github.com/NexDatawork/data-agents/pulls"><img alt = "pull requests" src = "https://img.shields.io/github/issues-pr-closed/NexDatawork/data-agents?label=pull%20requests&labelColor=blue"/></a>
19
- <a href="https://github.com/NexDatawork/data-agents/blob/main/LICENSE"><img alt = "LICENSE" src = "https://img.shields.io/badge/license-blue"/></a>
20
- <a href = "https://discord.gg/Tb55tT5UtZ"><img src="https://img.shields.io/badge/Discord-Join%20Community-7289DA?logo=discord&logoColor=white" alt="Discord"></a>
21
  <a href="https://github.com/NexDatawork/data-agents/stargazers"><img src="https://img.shields.io/github/stars/NexDatawork/data-agents?style=social" alt="GitHub Stars"></a>
 
 
 
22
 
23
 
24
  </div>
 
15
  </h2>
16
 
17
  <div align='center'>
18
+ <a href="https://github.com/NexDatawork/data-agents/pulls"><img alt = "pull requests" src = "https://img.shields.io/github/issues-pr-closed/NexDatawork/data-agents?label=pull%20requests&labelColor=rgba(56, 52, 182, 1)&color=rgb(90, 42, 184)"/></a>
19
+ <a href="https://github.com/NexDatawork/data-agents/blob/main/LICENSE"><img alt = "LICENSE" src = "https://img.shields.io/badge/license-Apache%202.0-blueviolet?style=flat&color=rgb(90, 42, 184)&labelColor=rgba(56, 52, 182, 1)"/></a>
20
+ <a href = "https://discord.gg/Tb55tT5UtZ"><img src="https://img.shields.io/badge/Discord-Join%20Community-7289DA?logo=discord&logoColor=white&color=rgb(90, 42, 184)&labelColor=rgba(56, 52, 182, 1)" alt="Discord"></a>
21
  <a href="https://github.com/NexDatawork/data-agents/stargazers"><img src="https://img.shields.io/github/stars/NexDatawork/data-agents?style=social" alt="GitHub Stars"></a>
22
+ <a href="https://huggingface.co/NexDatawork">
23
+ <img alt="Hugging Face" src="https://img.shields.io/badge/Hugging%20Face-Models%20%26%20Datasets?logo=huggingface&color=rgb(90, 42, 184)">
24
+ </a>
25
 
26
 
27
  </div>
examples/data_agent_demo.ipynb CHANGED
@@ -561,29 +561,39 @@
561
  "\n",
562
  "#llm is the agent that creates the etl pipeline\n",
563
  "#dataframe is a string with the name of the dataframe push through the etl process\n",
564
- "def etl_pipeline(dataframe,history):\n",
565
- " tools = [preview_data, suggest_transformation, generate_python_code]\n",
566
- "\n",
567
- " agent = initialize_agent(tools, model, agent='zero-shot-react-description',verbose=True)\n",
568
- "\n",
569
- " input_prompt = f\"\"\"\n",
570
- " Preview the table {dataframe} and \\\n",
571
- " generate Python code to read the table, clean it, and finally write the \\\n",
572
- " dataframe into a table called {'Cleaned_'+dataframe}]. \\\n",
573
- " Do not stop the Python session\n",
574
- " \"\"\"\n",
575
- "\n",
576
- " # Preview + suggest + generate code in a single run\n",
577
- " response = agent.run({\n",
578
- " \"input\": input_prompt,\n",
579
- " \"chat_history\": [],\n",
580
- " \"handle_parsing_errors\": True\n",
581
- " })\n",
582
- "\n",
583
- " print(\"Generated Python Code:\\n\")\n",
584
- " print(response)\n",
585
- " response2 = response.strip('`').replace('python', '')\n",
586
- " return history + response2, response2\n"
 
 
 
 
 
 
 
 
 
 
587
  ],
588
  "metadata": {
589
  "id": "n6sfgj8W4eAA"
@@ -730,13 +740,13 @@
730
  "\n",
731
  " with gr.Column():\n",
732
  "\n",
733
- " result_display = gr.Markdown(label=\"📌 Report Output (Markdown)\")\n",
734
  "\n",
735
  " with gr.Row():\n",
736
  "\n",
737
  " trace_display = gr.Markdown(label=\"🛠️ Data Agent Reasoning - Your Explainable Agent\", elem_classes=[\"trace-markdown\"])\n",
738
  "\n",
739
- " sql_display = gr.Markdown(label='SQL Process')\n",
740
  "\n",
741
  "\n",
742
  " with gr.Row(equal_height=True):\n",
@@ -762,6 +772,7 @@
762
  " scraping_button.click(fn=web_scraping,inputs=[question_input,history],outputs = [trace_display,history])\n",
763
  "\n",
764
  " ask_button.click(fn=ask_agent,inputs=[file_input, question_input,history],outputs=[trace_display,history])\n",
 
765
  "\n",
766
  "demo.launch(share=True,debug=False)"
767
  ],
 
561
  "\n",
562
  "#llm is the agent that creates the etl pipeline\n",
563
  "#dataframe is a string with the name of the dataframe push through the etl process\n",
564
+ "def etl_pipeline(dataframe,history=\"\"):\n",
565
+ " print(\"=\"*10 + \"\\nETL_PIPELINE\\n\"+\"=\"*10)\n",
566
+ "\n",
567
+ " try:\n",
568
+ "\n",
569
+ " table_name = dataframe[0]\n",
570
+ "\n",
571
+ "\n",
572
+ "\n",
573
+ " tools = [preview_data, suggest_transformation, generate_python_code]\n",
574
+ "\n",
575
+ " agent = initialize_agent(tools, model, agent='zero-shot-react-description',verbose=True)\n",
576
+ "\n",
577
+ " input_prompt = f\"\"\"\n",
578
+ " Preview the table {table_name} and \\\n",
579
+ " generate Python code to read the table, clean it, and finally write the \\\n",
580
+ " dataframe into a table called {'Cleaned_'+table_name}]. \\\n",
581
+ " Do not stop the Python session\n",
582
+ " \"\"\"\n",
583
+ "\n",
584
+ " # Preview + suggest + generate code in a single run\n",
585
+ " response = agent.run({\n",
586
+ " \"input\": input_prompt,\n",
587
+ " \"chat_history\": [],\n",
588
+ " \"handle_parsing_errors\": True\n",
589
+ " })\n",
590
+ "\n",
591
+ " print(\"Generated Python Code:\\n\")\n",
592
+ " print(response)\n",
593
+ " response2 = response.strip('`').replace('python', '')\n",
594
+ " return history + response2, response2\n",
595
+ " except Exception as e:\n",
596
+ " return f'ETL pipeline error: {e}',f'ETL pipeline error: {e}'\n"
597
  ],
598
  "metadata": {
599
  "id": "n6sfgj8W4eAA"
 
740
  "\n",
741
  " with gr.Column():\n",
742
  "\n",
743
+ " #result_display = gr.Markdown(label=\"📌 Report Output (Markdown)\")\n",
744
  "\n",
745
  " with gr.Row():\n",
746
  "\n",
747
  " trace_display = gr.Markdown(label=\"🛠️ Data Agent Reasoning - Your Explainable Agent\", elem_classes=[\"trace-markdown\"])\n",
748
  "\n",
749
+ " etl_display = gr.Markdown(label='ETL Process',elem_classes=[\"trace-markdown\"])\n",
750
  "\n",
751
  "\n",
752
  " with gr.Row(equal_height=True):\n",
 
772
  " scraping_button.click(fn=web_scraping,inputs=[question_input,history],outputs = [trace_display,history])\n",
773
  "\n",
774
  " ask_button.click(fn=ask_agent,inputs=[file_input, question_input,history],outputs=[trace_display,history])\n",
775
+ " ask_button.click(fn=etl_pipeline,inputs=[file_input ,history],outputs=[etl_display,history])\n",
776
  "\n",
777
  "demo.launch(share=True,debug=False)"
778
  ],