andrealiao commited on
Commit
edbfd50
Β·
1 Parent(s): 00d226e
Files changed (2) hide show
  1. README.md +1 -1
  2. examples/app.py +531 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸ€–
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
7
- app_file: app.py
8
  pinned: false
9
  ---
10
 
 
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
7
+ app_file: examples/app.py
8
  pinned: false
9
  ---
10
 
examples/app.py CHANGED
@@ -0,0 +1,531 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """data_agent_demo.ipynb
3
+ Automatically generated by Colab.
4
+ Original file is located at
5
+ https://colab.research.google.com/drive/1DBkfSNSZIyONNTEgSILfCoOyAGrx13DY
6
+ # Introduction
7
+ NexDatawork is an AI data agent for data engineering and analytics without writing code.
8
+ ## Prerequisites
9
+ - langchain
10
+ - langgraph
11
+ - sqlalchemy
12
+ - pandas
13
+ - gradio
14
+ Before starting your work install all the required tools:
15
+ """
16
+
17
+ # Commented out IPython magic to ensure Python compatibility.
18
+ # Clean out any mixed installs first
19
+ # %pip uninstall -y langchain langchain-core langchain-community langchain-openai langchain-anthropic langchain-google-vertexai langchain-experimental langgraph langchain-scrapegraph
20
+
21
+ # Install a consistent, modern set
22
+ # %pip install -U \
23
+ # "langchain==0.3.*" \
24
+ # "langchain-core==0.3.*" \
25
+ # "langchain-community==0.3.*" \
26
+ # "langgraph>=0.2,<0.3" \
27
+ # "langchain-openai>=0.2.0" \
28
+ # "langchain-anthropic>=0.2.0" \
29
+ # "langchain-google-vertexai>=2.0.0" \
30
+ # "sqlalchemy>=2.0" \
31
+ # "pandas>=2.0" \
32
+ # "gradio>=4.0" \
33
+ # "langchain-experimental"\
34
+ # "langchain-scrapegraph"
35
+
36
+ import sys, importlib.util, importlib.metadata as md
37
+
38
+ def v(p):
39
+ try:
40
+ return md.version(p)
41
+ except md.PackageNotFoundError:
42
+ return "not installed"
43
+
44
+ print("Kernel Python:", sys.executable)
45
+ print("langchain:", v("langchain"))
46
+ print("langchain-core:", v("langchain-core"))
47
+ print("langchain-community:", v("langchain-community"))
48
+ print("langgraph:", v("langgraph"))
49
+ print("langchain-openai:", v("langchain-openai"))
50
+ print("langchain-anthropic:", v("langchain-anthropic"))
51
+ print("langchain-google-vertexai:", v("langchain-google-vertexai"))
52
+ print("langchain-experimental:", v("langchain-experimental"))
53
+ print("langchain-scrapegraph:", v("langchain-scrapegraph"))
54
+
55
+ print("langgraph importable?", importlib.util.find_spec("langgraph") is not None)
56
+
57
+ import os
58
+ import io
59
+ import contextlib
60
+ import pandas as pd
61
+ import gradio as gr
62
+ from IPython.display import Markdown, HTML, display
63
+
64
+ from sqlalchemy import (
65
+ Engine, create_engine, MetaData, Table, Column,
66
+ String, Integer, Float, insert, inspect, text
67
+ )
68
+
69
+ # LangChain 0.3.x import paths
70
+ from langchain_openai import AzureChatOpenAI
71
+ from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
72
+ from langchain.agents import initialize_agent
73
+ from langchain.agents.agent_types import AgentType
74
+ from langchain.tools import tool
75
+ from langchain_scrapegraph.tools import SmartScraperTool
76
+ from langchain.memory import ConversationTokenBufferMemory
77
+ from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
78
+ from langchain_community.agent_toolkits import SQLDatabaseToolkit
79
+ from langchain_community.utilities import SQLDatabase
80
+ from langchain_core.messages import HumanMessage
81
+
82
+ # LangGraph
83
+ from langgraph.prebuilt import create_react_agent
84
+
85
+
86
+
87
+ print("βœ… Imports OK")
88
+
89
+ """To access AzureOpenAI models you'll need to create an Azure account, create a deployment of an Azure OpenAI model, get the name and endpoint for your deployment, get an Azure OpenAI API key and install the langchain-openai integration package.
90
+ To access SmartScraperTool you will need a ScrapeGraphAI (SGAI) account and get an API key to launch the agent.
91
+ Replace the placeholders with the actual values.
92
+ """
93
+
94
+ os.environ["AZURE_OPENAI_ENDPOINT"] = "INSERT THE AZURE OPENAI ENDPOINT"
95
+ os.environ["AZURE_OPENAI_API_KEY"] = "INSERT YOUR AZURE OPENAI API KEY"
96
+ os.environ["SGAI_API_KEY"] = "INSERT YOUR SGAI API KEY"
97
+
98
+ """To set up the Azure OpenAI model choose the name for ```AZURE_DEPLOYMENT_NAME``` and insert ```AZURE_API_VERSION``` (the latest supported version can be found here: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference)."""
99
+
100
+ # Load your Azure environment variables
101
+ AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
102
+ AZURE_DEPLOYMENT_NAME = "gpt-4.1" # πŸ‘ˆ Change if needed
103
+ AZURE_API_VERSION = "2025-01-01-preview" # πŸ‘ˆ Use your correct version
104
+
105
+ # Define Azure LLM with streaming enabled
106
+ model = AzureChatOpenAI(
107
+ openai_api_version=AZURE_API_VERSION,
108
+ azure_deployment=AZURE_DEPLOYMENT_NAME,
109
+ azure_endpoint=AZURE_OPENAI_ENDPOINT,
110
+ streaming=True,
111
+ callbacks=[StreamingStdOutCallbackHandler()],
112
+ )
113
+
114
+ """The following block contains prompts that define the agents behaviour.
115
+ ```CSV_PROMPT_PREFIX``` is responsible for the data agent logic, i.e. steps that it takes to complete a task. The prefix can be modified to change analytical methodology, add specific data processing steps, implement a certain data validation technique and more.
116
+ ```CSV_PROMPT_SUFFIX``` defines the structure and the content of the agent's output. Suffix can be modified to change the report structure, add sections, include additional insights and so on.
117
+ ```system_message``` is for creating SQL queries. It specifies the behaviour of the agent, making it certify its results and restricting it from changing the database.
118
+ ```SCRAPING_PROMPT_PREFIX``` is responsible for the web scraping agent logic. It specifies how the agent should behave and defines its chain of thought when asked to find data online.
119
+ ```SCRAPING_PROMPT_SUFFIX``` is responsible for the output of the web scraping agent. It can be changed to set up the format of the output.
120
+ """
121
+
122
+ # Prompt prefix to set the tone for the agent.
123
+ #By specifying the prompt prefix you may make the results of the agent more specific and consistent.
124
+ #The following prompt can be substituted with an original one.
125
+ CSV_PROMPT_PREFIX = """
126
+ Set pandas to show all columns.
127
+ Get the column names and infer data types.
128
+ Then attempt to answer the question using multiple methods.
129
+ Please provide only the Python code required to perform the action, and nothing else.
130
+ """
131
+
132
+ #Prompt suffix describes the output format.
133
+ #Modify this prompt to change the structure of the agent's answer.
134
+ #You can also add more sections so that the agent touches more aspects.
135
+ #The following prompt can be substituted with a personal one.
136
+ CSV_PROMPT_SUFFIX = """
137
+ - Try at least 2 different methods of calculation or filtering.
138
+ - Reflect: Do they give the same result?
139
+ - After performing all necessary actions and analysis with the dataframe, return the answer in clean **Markdown**, include summary table if needed.
140
+ - Include **Execution Recommendation** and **Web Insight** in the final Markdown.
141
+ - Always conclude the final Markdown with:
142
+ ### Final Answer
143
+ Your conclusion here.
144
+ ---
145
+ ### Explanation
146
+ Mention specific columns you used.
147
+ Please provide only the Python code required to perform the action, and nothing else until the final Markdown output.
148
+ """
149
+
150
+
151
+ #prompt for creating SQL queries
152
+ #By secifying the pipeline you can make the agent's results more consistent.
153
+ system_message = """
154
+ You are an agent designed to interact with a SQL database.
155
+ Given an input question, create a syntactically correct {dialect} query to run,
156
+ then look at the results of the query and return the answer. Unless the user
157
+ specifies a specific number of examples they wish to obtain, always limit your
158
+ query to at most {top_k} results.
159
+ You can order the results by a relevant column to return the most interesting
160
+ examples in the database. Never query for all the columns from a specific table,
161
+ only ask for the relevant columns given the question.
162
+ You MUST double check your query before executing it. If you get an error while
163
+ executing a query, rewrite the query and try again.
164
+ DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
165
+ database.
166
+ To start you should ALWAYS look at the tables in the database to see what you
167
+ can query. Do NOT skip this step.
168
+ Then you should query the schema of the most relevant tables.
169
+ """.format(
170
+ dialect="SQLite",
171
+ top_k=5,
172
+ )
173
+
174
+ sql_suffix_prompt = '''
175
+ ALWAYS end your answer as follows:
176
+ ### Final answer
177
+ Your query here
178
+ --
179
+ The answer here
180
+ '''
181
+
182
+ SCRAPING_PROMPT_PREFIX = '''
183
+ ROLE: Expert Data Scraper
184
+ MISSION: Extract precise online data using systematic keyword analysis
185
+ THINKING PROCESS:
186
+ 1. Keyword Analysis: Identify primary entities (X, Y) and quantifiers (n, m)
187
+ 2. Query Strategy: Formulate targeted search queries for each entity
188
+ 3. Data Extraction: Scrape exact quantities specified
189
+ 4. Validation: Verify results match request parameters
190
+ EXAMPLE:
191
+ User: "List first 5 startups and 3 investors in AI"
192
+ Keywords: ["startups:5", "investors:3", "AI"]
193
+ Action: Search "AI startups" β†’ extract 5 instances β†’ Search "AI investors" β†’ extract 3 instances
194
+ WORKFLOW:
195
+ - Print identified keywords with quantities
196
+ - Execute sequential searches per keyword group
197
+ - Collect exactly specified instances
198
+ - Present structured results
199
+ READY FOR QUERY.
200
+ '''
201
+
202
+ SCRAPING_PROMPT_SUFFIX = '''
203
+ ROLE: Data Extraction Agent
204
+ MISSION: Structure all scraped data as valid pandas DataFrames
205
+ OUTPUT REQUIREMENTS:
206
+ - Format: pandas DataFrame
207
+ - Columns: 1-2 word descriptive names
208
+ - Content: Only strings or numerical values (no lists/dicts, no nested structures)
209
+ - Validation: Must pass pd.DataFrame access tests
210
+ VALIDATION CHECKLIST:
211
+ βœ“ Each column contains only strings or numerics
212
+ βœ“ No nested structures (lists/dicts) in cells
213
+ βœ“ Column names are descriptive and concise
214
+ βœ“ DataFrame is accessible via standard indexing
215
+ βœ“ All columns MUST BE OF THE SAME LENGTH
216
+ EXAMPLE OUTPUT:
217
+ ```python
218
+ pd.DataFrame({
219
+ 'Company': ['Startup A', 'Startup B'],
220
+ 'Funding': [5000000, 7500000],
221
+ 'Industry': 'Artificial Intelligence'
222
+ })
223
+ '''
224
+
225
+ """The following block is responsible for the logic of the agent and the output that it produces.
226
+ ```ask_agent``` function concatenates the dataframes into one and starts an AI agent for working with the concatenated dataframes. It uses the prompts from the previous blocks for its logic.
227
+ """
228
+
229
+ # Replace this with your actual LLM setup
230
+ # Example:
231
+ # from langchain_openai import AzureChatOpenAI
232
+ # model = AzureChatOpenAI(...)
233
+
234
+
235
+ # --- Agent Logic ---
236
+ def ask_agent(files, question, history):
237
+ try:
238
+ dfs = [pd.read_csv(f.name) for f in files]
239
+ df = pd.concat(dfs, ignore_index=True) #concatenation of all of the files uploaded into one
240
+ except Exception as e:
241
+ return f"❌ Could not read CSVs: {e}", ""
242
+
243
+ try:
244
+ agent = create_pandas_dataframe_agent(
245
+ llm=model, #sets the llm as the one specified earlier (Azure LLM)
246
+ df=df, #pandas dataframe or a list of pandas dataframes
247
+ verbose=True, #enables verbose logging for debugging
248
+ agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, #defines a specific type of agent that performs tasks without additional examples
249
+ allow_dangerous_code=True, #allows execution of Python code
250
+ handle_parsing_errors=True, # πŸ‘ˆ this is the fix
251
+ ) #creates an agent for working with pandas dataframes
252
+
253
+
254
+ full_prompt = CSV_PROMPT_PREFIX + question + CSV_PROMPT_SUFFIX
255
+
256
+ buffer = io.StringIO()
257
+ with contextlib.redirect_stdout(buffer): #the output is redirected to the buffer
258
+ result = agent.invoke(full_prompt)
259
+ trace = buffer.getvalue() #retrieves the text created by the agent
260
+ output = result["output"] #retrieves the final answer
261
+
262
+
263
+ return history + output, output
264
+
265
+ except Exception as e:
266
+ return f"❌ Agent error: {e}", ""
267
+
268
+ """The block below deals with creating SQL code.
269
+ ```create_db``` creates a database where all the uploaded dataframes are stored for the data agent to work with.
270
+ ```start_llm``` starts a tool for working with SQL databases.
271
+ ```extract_code``` is used for extracting the SQL query from the agent's output.
272
+ ```sql_pipeline``` defines the pipeline, starting from creating a database with the uploaded dataframes, starting the agent for working with databases and creating the query according to the user's question.
273
+ """
274
+
275
+ #function create_db receives a dictionary with table names as a key and tables as values
276
+ def create_db(files):
277
+ print("="*10+"\nCREATE_DB\n"+"="*10)
278
+ try:
279
+ print("Attempting to create database...") # Added print statement here
280
+ engine = create_engine("sqlite:///database.db")
281
+ dataframes = dict()
282
+ print("="*10+f"CREATE_DB:\nfiles:{[f.name for f in files]}\n"+"="*10)
283
+ for f in files:
284
+ table_name = os.path.splitext(os.path.basename(f.name))[0]
285
+ dataframes[table_name] = pd.read_csv(f.name)
286
+ with engine.begin() as connection:
287
+ for name,table in zip(dataframes.keys(),dataframes.values()):
288
+ table.to_sql(name,connection,if_exists="replace",index=False) #writes the tables into a database
289
+
290
+ db = SQLDatabase.from_uri("sqlite:///database.db")
291
+ print("DATABASE database.db CREATED")
292
+ except Exception as e:
293
+ return f"Database error: {e}"
294
+ return db
295
+
296
+ #Initialization of a LLM model for SQL queries
297
+ def start_llm(database):
298
+ try:
299
+ print("="*10+"\nSTART_LLM\n"+"="*10)
300
+ toolkit = SQLDatabaseToolkit(db=database, llm=model) #creates a tool for working with SQL databases
301
+ tools = toolkit.get_tools()
302
+ except Exception as e:
303
+ return f"Couldn't retrieve SQLDatabaseToolkit: {e}"
304
+ print("\nSQLDatabaseToolkit CREATED\n")
305
+ return model, tools
306
+
307
+
308
+ def extract_code(HumanMessage):
309
+ print("="*10+"\nEXTRACT_CODE\n"+"="*10)
310
+ try:
311
+ FRONT_INDENT = len('\n\n')
312
+ BACK_INDENT = len('\n')
313
+ p1 = HumanMessage.find('### Final answer')
314
+ print(p1,HumanMessage[p1:p1+50])
315
+ p2 = p1+FRONT_INDENT
316
+ return HumanMessage[p1:]
317
+ except Exception as e:
318
+ print(f'Extraction error: {e}')
319
+
320
+
321
+ #Function that receives dataframes, puts them in a database and uses an AI agent to create quieries based on the user's question
322
+ def sql_pipeline(tables,question,history):
323
+ print("="*10+"\nSQL_PIPELINE\n"+"="*10)
324
+ db = create_db(tables) #uploads the files added by the user and puts them in a database
325
+
326
+ if not os.path.exists("database.db"):
327
+ print("Database doesn't exist")
328
+ return "Database doesn't exist"
329
+ llm, tools = start_llm(db) #returns the agent and the tools for working with the database
330
+ try:
331
+ agent_executor = create_react_agent(llm, tools, prompt=system_message+sql_suffix_prompt)
332
+ output = ""
333
+ for step in agent_executor.stream(
334
+ {"messages": [{"role": "user", "content": question}]},
335
+ stream_mode="values",
336
+ ):
337
+ output += step["messages"][-1].content
338
+ #query = extract_code(output)
339
+ final_answer = extract_code(output)
340
+ return history + final_answer, final_answer
341
+ except Exception as e:
342
+ return f"SQL agent error: {e}"
343
+
344
+ """THe following block is responsible for creating a smart ETL pipeline"""
345
+
346
+ @tool
347
+ def preview_data(table: str) -> str:
348
+ "Reads and reviews a table"
349
+ df = pd.read_csv(table)
350
+ return df.head()
351
+
352
+ @tool
353
+ def suggest_transformation(column_summary: str) -> str:
354
+ "Suggests transformation based on column summary"
355
+ prompt = f"""
356
+ You are a data engineer assistant. Based on the following column summary, suggest simple, short ETL transformation steps.
357
+ Output format: each suggestion on a new line, without explanations or markdown.
358
+ Example:
359
+ Remove $ from revenue and cast to float
360
+ Column summary:
361
+ {column_summary}
362
+ """
363
+ return model.predict(prompt).strip()
364
+
365
+ @tool
366
+ def generate_python_code(transform_description: str) -> str:
367
+ "Generate pandas code from the transformation description"
368
+ prompt=f"""
369
+ You are a data engineer. Write pandas code to apply the following ETL transformation to a dataframe called 'df'.
370
+ Transformations:
371
+ {transform_description}
372
+ Only return pandas code. No explanation, no markdown.
373
+ """
374
+ return model.predict(prompt).strip()
375
+
376
+ #llm is the agent that creates the etl pipeline
377
+ #dataframe is a string with the name of the dataframe push through the etl process
378
+ def etl_pipeline(dataframe,history):
379
+ tools = [preview_data, suggest_transformation, generate_python_code]
380
+
381
+ agent = initialize_agent(tools, model, agent='zero-shot-react-description',verbose=True)
382
+
383
+ input_prompt = f"""
384
+ Preview the table {dataframe} and \
385
+ generate Python code to read the table, clean it, and finally write the \
386
+ dataframe into a table called {'Cleaned_'+dataframe}]. \
387
+ Do not stop the Python session
388
+ """
389
+
390
+ # Preview + suggest + generate code in a single run
391
+ response = agent.run({
392
+ "input": input_prompt,
393
+ "chat_history": [],
394
+ "handle_parsing_errors": True
395
+ })
396
+
397
+ print("Generated Python Code:\n")
398
+ print(response)
399
+ response2 = response.strip('`').replace('python', '')
400
+ return history + response2, response2
401
+
402
+ """The following code is responsible for AI web scraping agent"""
403
+
404
+ def web_scraping(question,history):
405
+ try:
406
+ tools = [
407
+ SmartScraperTool(),
408
+ ]
409
+
410
+ agent = initialize_agent(
411
+ tools=tools,
412
+ llm=model,
413
+ agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
414
+ verbose=True
415
+ )
416
+
417
+ buffer = io.StringIO()
418
+
419
+ print(SCRAPING_PROMPT_PREFIX + question + SCRAPING_PROMPT_SUFFIX)
420
+ with contextlib.redirect_stdout(buffer): #the output is redirected to the buffer
421
+ response = agent.run(SCRAPING_PROMPT_PREFIX + question + SCRAPING_PROMPT_SUFFIX)
422
+ trace = buffer.getvalue() #the trace of the agent is saved in the trace variable
423
+ return history + response, response
424
+ except Exception as e:
425
+ return f'Web scraping error {e}',f'Web scraping error {e}',""
426
+
427
+ """The next section creates a web interface using Gradio, providing a user-friendly way to analyze data and create SQL queries.
428
+ ```
429
+ with gr.Blocks(
430
+ css='''
431
+ Change the code here to modify the styling of the UI
432
+ '''
433
+ ) as demo:
434
+ ```
435
+ **Display Area**:
436
+ - `result_display`: Markdown report output
437
+ - `trace_display`: Agent reasoning trace
438
+ **Input Section**:
439
+ - `file_input`: Multiple CSV upload
440
+ - `question_input`: User query box
441
+ **Action Buttons**:
442
+ - `sql_button`: Generate SQL queries β†’ `sql_pipeline` function
443
+ - `ask_button`: Run analysis β†’ `ask_agent` function
444
+ **Styling**
445
+ - Light theme with rounded corners
446
+ - Custom CSS for professional appearance
447
+ **Launch**
448
+ `demo.launch(share=True,debug=False)` - Public access enabled, debugging disabled
449
+ For debugging use `debug=True` in order to see the messages in the console.
450
+ """
451
+
452
+ # --- Gradio UI ---
453
+ with gr.Blocks(
454
+ css="""
455
+ body, .gradio-container {
456
+ background: #ffffff !important;
457
+ color: #1f2937 !important;
458
+ font-family: 'Segoe UI', sans-serif;
459
+ }
460
+ #title {
461
+ color: #1f2937 !important;
462
+ font-size: 2rem;
463
+ font-weight: 600;
464
+ text-align: center;
465
+ padding-top: 20px;
466
+ padding-bottom: 10px;
467
+ }
468
+ .gr-box, .gr-input, .gr-output, .gr-markdown, .gr-textbox, .gr-file, textarea, input {
469
+ background: rgba(0, 0, 0, 0.04) !important;
470
+ border: 1px solid rgba(0, 0, 0, 0.1);
471
+ border-radius: 12px !important;
472
+ color: #1f2937 !important;
473
+ }
474
+ .trace-markdown {
475
+ height: 400px !important;
476
+ overflow-y: scroll;
477
+ resize: none;
478
+ }
479
+ textarea::placeholder, input::placeholder {
480
+ color: rgba(31, 41, 55, 0.6) !important;
481
+ }
482
+ button {
483
+ background: rgba(0, 0, 0, 0.07) !important;
484
+ color: #1f2937 !important;
485
+ border: 1px solid rgba(0, 0, 0, 0.15) !important;
486
+ border-radius: 8px !important;
487
+ }
488
+ button:hover {
489
+ background: rgba(0, 0, 0, 0.15) !important;
490
+ }
491
+ """
492
+ ) as demo:
493
+
494
+ gr.Markdown("<h2 id='title'>πŸ“Š NexDatawork Data Agent</h2>")
495
+
496
+ with gr.Column():
497
+
498
+ result_display = gr.Markdown(label="πŸ“Œ Report Output (Markdown)")
499
+
500
+ with gr.Row():
501
+
502
+ trace_display = gr.Markdown(label="πŸ› οΈ Data Agent Reasoning - Your Explainable Agent", elem_classes=["trace-markdown"])
503
+
504
+ sql_display = gr.Markdown(label='SQL Process')
505
+
506
+
507
+ with gr.Row(equal_height=True):
508
+
509
+ file_input = gr.File(label="πŸ“ Upload CSV(s)", file_types=[".csv"], file_count="multiple",height=120)
510
+
511
+ question_input = gr.Textbox(label="πŸ’¬ Ask Your Agent",placeholder="e.g., What is the trend for revenue over time?",lines=2)
512
+
513
+ with gr.Row():
514
+
515
+ ask_button = gr.Button("πŸ’‘ Analyze")
516
+
517
+ with gr.Row():
518
+
519
+ sql_button = gr.Button('Create Query')
520
+
521
+ scraping_button = gr.Button('Find the answer online')
522
+
523
+ history = gr.State(value="")
524
+
525
+ sql_button.click(fn=sql_pipeline,inputs=[file_input,question_input,history],outputs = [trace_display,history])
526
+
527
+ scraping_button.click(fn=web_scraping,inputs=[question_input,history],outputs = [trace_display,history])
528
+
529
+ ask_button.click(fn=ask_agent,inputs=[file_input, question_input,history],outputs=[trace_display,history])
530
+
531
+ demo.launch(share=True,debug=False)