Files changed (8) hide show
  1. .env +1 -1
  2. Data_Geneartion_Agent.py +303 -318
  3. Dockerfile +19 -19
  4. README.md +112 -120
  5. main.py +83 -88
  6. requirements.txt +11 -12
  7. runtime_env.json +0 -1
  8. templates/index.html +0 -495
.env CHANGED
@@ -1 +1 @@
1
- GOOGLE_API_KEY=AIzaSyAp9Gib90XnjNdC-GEZNwk9x7vmOyTyGiU
 
1
+ GOOGLE_API_KEY=AIzaSyDDYC_kOXY7iBSX7jaaBEKojDkECLDgaUk
Data_Geneartion_Agent.py CHANGED
@@ -1,318 +1,303 @@
1
- import pandas as pd
2
- import numpy as np
3
- from langchain_core.prompts import ChatPromptTemplate
4
- from langchain.agents import AgentExecutor, create_react_agent , BaseMultiActionAgent , initialize_agent, AgentType , create_openai_tools_agent , create_openai_functions_agent , create_tool_calling_agent
5
- from langchain.tools import Tool
6
- from langchain_google_genai import ChatGoogleGenerativeAI
7
- import json
8
- from dotenv import load_dotenv
9
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
- from langchain.schema.agent import AgentActionMessageLog
11
- from langchain.agents.agent import AgentAction
12
- from langchain.chains import LLMChain
13
- from langchain_core.runnables import Runnable
14
- load_dotenv()
15
- import os
16
-
17
- # os.makedirs("/tmp/", exist_ok=True)
18
-
19
-
20
- # openai_api_key = os.getenv("OPENAI_API_KEY")
21
-
22
-
23
-
24
- def load_api_key():
25
- try:
26
- with open("runtime_env.json", "r") as f:
27
- env = json.load(f)
28
- return env.get("API_KEY")
29
- except Exception:
30
- return None
31
-
32
- GOOGLE_API_KEY = load_api_key()
33
- print(f"Here is api_key {GOOGLE_API_KEY}")
34
-
35
-
36
-
37
- system_prompt = """
38
- You are a Synthetic Data Generation Agent responsible for producing structured conversational data suitable for fine-tuning a language model.
39
-
40
- Your task follows this pipeline:
41
- 1. **Understand the user's request** to determine the data domain and format.
42
- 2. **Generate a diverse list of realistic user instructions** related to the request topic using the generate_data_tool.
43
- 3. **Create corresponding assistant responses** using the generate_response_tool.
44
- 4. **IMPORTANT**: After generating both instructions and responses, you MUST use the csv_tool to save the data.
45
- 5. **Return the output in JSON format** using two keys only:
46
- - `"instructions"`: An array of user queries or prompts.
47
- - `"response"`: An array of assistant replies corresponding to each instruction.
48
-
49
- ### Output format:
50
- Return the final output in this JSON format not any other text.:
51
- ```json
52
- {{
53
- "instructions": ["<user prompt 1>", "<user prompt 2>", "..."],
54
- "response": ["<assistant response 1>", "<assistant response 2>", "..."]
55
- }}
56
- ### Available Tools:
57
-
58
- 1. **generate_data_tool**
59
- - Use when: You need to create initial instructions for data generation
60
- - Purpose: Generates structured instructions based on user input
61
- - Input: User's query about what kind of data they want
62
- - Output: JSON with "instructions" key
63
-
64
- 2. **generate_response_tool**
65
- - Use when: You have instructions and need to generate corresponding responses
66
- - Purpose: Creates appropriate responses for the given instructions
67
- - Input: Instructions from generate_data_tool
68
- - Output: JSON with "response" key
69
-
70
- 3. **csv_tool**
71
- - Use when: You have complete JSON data ready to be saved
72
- - Purpose: Converts JSON data to CSV format and saves it
73
- - Input: Complete JSON data with both instructions and responses
74
- - Output: Saves data to "Data_File.csv"
75
-
76
- ### Tool Usage Flow:
77
- 1. First, use generate_data_tool to create instructions
78
- 2. Then, use generate_response_tool to create corresponding responses
79
- 3. Finally, you MUST use csv_tool to save the complete dataset
80
-
81
- Remember to always maintain the correct JSON structure throughout the process.
82
-
83
- IMPORTANT: Only provide JSON output without any additional text before or after the JSON structure except of 'Json'. Do not include any explanatory text, markdown formatting, or other content outside the JSON object.
84
- If user dose not mention the number of rows then default to 10 rows.
85
- """
86
-
87
-
88
- query_system_prompt = """You are a **Data Generation Agent** that produces **natural language instructions** to guide the creation of fine-tuning datasets based on a user request.
89
-
90
- ### Your Task:
91
- 1. Understand the user's input and determine the type and topic of data required.
92
- 2. Based on the input, generate a **single, clear instruction** for creating a dataset. The instruction should describe what kind of data to generate, in natural and concise language.
93
- 3. If the number of rows is not explicitly mentioned, default to **10 rows**.
94
- 4. **Only return a string** with one key: `"instructions"`.
95
-
96
- ---
97
-
98
- ### Output Format:
99
-
100
- "instructions": "Generate 1000 rows of employee salary data based on..."
101
-
102
-
103
- """
104
- response_system_prompt = """
105
- You are a **Data Generation Agent** responsible for generating **structured data responses** based on the given instructions.
106
-
107
- ---
108
-
109
- ### Your Task:
110
- 1. Read and understand the provided **instructions**.
111
- 2. Generate the appropriate **data or description** that directly fulfills the instructions.
112
- 3. Return only a **JSON string** with one key: `"response"`.
113
-
114
- ---
115
-
116
- ### Output Format:
117
-
118
- "response": "Here is the data..."
119
-
120
-
121
- """
122
-
123
-
124
-
125
- def generate_data(query : str) -> str:
126
- try:
127
- query_llm = ChatGoogleGenerativeAI(
128
- model="gemini-2.0-flash ", # gemini-2.0-flash or gemini-2.5-flash-preview-04-17
129
- api_key=GOOGLE_API_KEY,
130
- temperature=0.9,
131
- )
132
- query_prompt = ChatPromptTemplate.from_messages([
133
- ("system", query_system_prompt),
134
- ("human", "{input}"),
135
- ("assistant", "{agent_scratchpad}"),
136
- ])
137
- chain: Runnable = query_prompt | query_llm
138
- result = chain.invoke({"input": query, "agent_scratchpad": ""})
139
- return result
140
- except Exception as e:
141
- print(f"Error in generate_data: {str(e)}")
142
- raise
143
-
144
- def generate_response(instructions : str) -> str:
145
- try:
146
- response_llm = ChatGoogleGenerativeAI(
147
- model="gemini-2.0-flash",
148
- api_key=GOOGLE_API_KEY,
149
- temperature=0.9,
150
- )
151
- response_prompt = ChatPromptTemplate.from_messages([
152
- ("system", response_system_prompt),
153
- ("human", "{instructions}"),
154
- ("assistant", "{agent_scratchpad}"),
155
- ])
156
- chain: Runnable = response_prompt | response_llm
157
- result = chain.invoke({"instructions": instructions, "agent_scratchpad": ""})
158
- return result
159
- except Exception as e:
160
- print(f"Error in generate_response: {str(e)}")
161
- raise
162
-
163
- def save_to_csv(data: str):
164
- try:
165
- # Clean the input string by removing triple quotes
166
-
167
- print(f"here is the data {data}")
168
- # Parse the JSON string into a Python dictionary
169
- if ('json') in data:
170
- print(f"json is in data")
171
- if "```" in data:
172
- print(f"``` is in data")
173
- data = data.replace("json", "").strip()
174
- data = data.replace("```", "").strip()
175
- data_dict = json.loads(data)
176
- df = pd.DataFrame()
177
- df['instructions'] = data_dict['instructions']
178
- df['response'] = data_dict['response']
179
-
180
- print(f"DataFrame shape: {df.shape}")
181
- print(f"DataFrame columns: {df.columns.tolist()}")
182
- print(f"here is df {df}")
183
- # Save to CSV without index
184
- print("\nSaving to CSV...")
185
- output_path = "/tmp/Data_File.csv"
186
- df.to_csv(output_path, index=False)
187
- return f"File scucessfully created"
188
- else:
189
- print(f"``` is not in data")
190
- data = data.replace("json", "").strip()
191
- print(f"data is {data}")
192
- data_dict = json.loads(data)
193
- print(f"data_dict is {data_dict}")
194
- df = pd.DataFrame()
195
- df['instructions'] = data_dict['instructions']
196
- df['response'] = data_dict['response']
197
-
198
- print(f"DataFrame shape: {df.shape}")
199
- print(f"DataFrame columns: {df.columns.tolist()}")
200
- print(f"here is df {df}")
201
- # Save to CSV without index
202
- print("\nSaving to CSV...")
203
- output_path = "/tmp/Data_File.csv"
204
- df.to_csv(output_path, index=False)
205
- return f"File scucessfully created"
206
- # Convert the dictionary to a DataFrame
207
- elif "```" in data:
208
- print(f"``` is in data")
209
- data = data.replace("```", "").strip()
210
- data = json.loads(data)
211
- df = pd.DataFrame()
212
- df['instructions'] = data['instructions']
213
- df['response'] = data['response']
214
-
215
- # Save to CSV without index
216
- print("\nSaving to CSV...")
217
- output_path = "/tmp/Data_File.csv"
218
- df.to_csv(output_path, index=False)
219
- return f"File created successfully"
220
- else:
221
- print(f"''' is not in data")
222
- data = json.loads(data)
223
- df = pd.DataFrame()
224
- df['instructions'] = data['instructions']
225
- df['response'] = data['response']
226
-
227
- # Save to CSV without index
228
- print("\nSaving to CSV...")
229
- output_path = "/tmp/Data_File.csv"
230
- df.to_csv(output_path, index=False)
231
- return f"File created successfully"
232
-
233
-
234
- except json.JSONDecodeError as e:
235
- raise ValueError(f"Invalid JSON data: {str(e)}")
236
-
237
-
238
-
239
- generate_data_tool = Tool(
240
- name="generate_data_tool",
241
- description="Generate the data(Instructions) for the query",
242
- func=generate_data
243
- )
244
- generate_response_tool = Tool(
245
- name="generate_response_tool",
246
- description="Generate the data(Response) for the instructions",
247
- func=generate_response
248
- )
249
-
250
-
251
- csv_tool = Tool(
252
- name="csv_tool",
253
- description="Pass the JSON data after generating both instructions and responses, convert it into csv then save the csv file",
254
- func=save_to_csv
255
- )
256
-
257
- tools = [generate_data_tool, generate_response_tool,csv_tool]
258
-
259
-
260
-
261
-
262
- query_prompt = ChatPromptTemplate.from_messages([
263
- ("system", system_prompt),
264
- ("human", "{input}"),
265
- MessagesPlaceholder(variable_name="agent_scratchpad")
266
- ])
267
-
268
- llm = ChatGoogleGenerativeAI(
269
- model="gemini-2.0-flash",
270
- api_key=GOOGLE_API_KEY,
271
- temperature=0.9,
272
- )
273
-
274
- agent = create_openai_tools_agent(llm=llm, prompt=query_prompt, tools=tools)
275
- data_agent = AgentExecutor(agent=agent, tools=tools, verbose=True)
276
-
277
-
278
-
279
- def generate_data_agent(query: str):
280
- try:
281
- GOOGLE_API_KEY = load_api_key()
282
- print(f"Here is api_key after puting {GOOGLE_API_KEY}")
283
- if not query:
284
- return {"status": "error", "message": "Query cannot be empty", "csv_file": None}
285
-
286
- print(f"Processing query: {query}")
287
- result = data_agent.invoke({"input": query})
288
- print(f"Agent execution result: {result['output']}")
289
- save_to_csv(result['output'])
290
- print(f"here is the result {type(result['output'])}")
291
- # Check if data.csv was created
292
- if os.path.exists("tmp/Data_File.csv"):
293
- return {
294
- "status": "success",
295
- "message": "Created successfully! You can download the CSV file below.",
296
- "csv_file": "Data_File.csv"
297
- }
298
- else:
299
- return {
300
- "status": "error",
301
- "message": "Failed to generate data file",
302
- "csv_file": None
303
- }
304
- except Exception as e:
305
- print(f"Error in generate_data_agent: {str(e)}")
306
- return {
307
- "status": "error",
308
- "message": f"An error occurred: {str(e)}",
309
- "csv_file": None
310
- }
311
-
312
-
313
-
314
-
315
-
316
-
317
-
318
-
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain.agents import AgentExecutor, create_react_agent , BaseMultiActionAgent , initialize_agent, AgentType , create_openai_tools_agent , create_openai_functions_agent , create_tool_calling_agent
7
+ from langchain.tools import Tool
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ import json
10
+ from dotenv import load_dotenv
11
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
12
+ from langchain.schema.agent import AgentActionMessageLog
13
+ from langchain.agents.agent import AgentAction
14
+ from langchain.chains import LLMChain
15
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
16
+ from langchain_core.runnables import Runnable
17
+ load_dotenv()
18
+ import os
19
+
20
+
21
+ openai_api_key = os.getenv("OPENAI_API_KEY")
22
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
23
+
24
+
25
+ system_prompt = """
26
+ You are a Synthetic Data Generation Agent responsible for producing structured conversational data suitable for fine-tuning a language model.
27
+
28
+ Your task follows this pipeline:
29
+ 1. **Understand the user's request** to determine the data domain and format.
30
+ 2. **Generate a diverse list of realistic user instructions** related to the request topic using the generate_data_tool.
31
+ 3. **Create corresponding assistant responses** using the generate_response_tool.
32
+ 4. **IMPORTANT**: After generating both instructions and responses, you MUST use the csv_tool to save the data.
33
+ 5. **Return the output in JSON format** using two keys only:
34
+ - `"instructions"`: An array of user queries or prompts.
35
+ - `"response"`: An array of assistant replies corresponding to each instruction.
36
+
37
+ ### Output format:
38
+ Return the final output in this JSON format not any other text.:
39
+ ```json
40
+ {{
41
+ "instructions": ["<user prompt 1>", "<user prompt 2>", "..."],
42
+ "response": ["<assistant response 1>", "<assistant response 2>", "..."]
43
+ }}
44
+ ### Available Tools:
45
+
46
+ 1. **generate_data_tool**
47
+ - Use when: You need to create initial instructions for data generation
48
+ - Purpose: Generates structured instructions based on user input
49
+ - Input: User's query about what kind of data they want
50
+ - Output: JSON with "instructions" key
51
+
52
+ 2. **generate_response_tool**
53
+ - Use when: You have instructions and need to generate corresponding responses
54
+ - Purpose: Creates appropriate responses for the given instructions
55
+ - Input: Instructions from generate_data_tool
56
+ - Output: JSON with "response" key
57
+
58
+ 3. **csv_tool**
59
+ - Use when: You have complete JSON data ready to be saved
60
+ - Purpose: Converts JSON data to CSV format and saves it
61
+ - Input: Complete JSON data with both instructions and responses
62
+ - Output: Saves data to "Data_File.csv"
63
+
64
+ ### Tool Usage Flow:
65
+ 1. First, use generate_data_tool to create instructions
66
+ 2. Then, use generate_response_tool to create corresponding responses
67
+ 3. Finally, you MUST use csv_tool to save the complete dataset
68
+
69
+ Remember to always maintain the correct JSON structure throughout the process.
70
+
71
+ IMPORTANT: Only provide JSON output without any additional text before or after the JSON structure except of 'Json'. Do not include any explanatory text, markdown formatting, or other content outside the JSON object.
72
+ If user dose not mention the number of rows then default to 10 rows.
73
+ """
74
+
75
+
76
+ query_system_prompt = """You are a **Data Generation Agent** that produces **natural language instructions** to guide the creation of fine-tuning datasets based on a user request.
77
+
78
+ ### Your Task:
79
+ 1. Understand the user's input and determine the type and topic of data required.
80
+ 2. Based on the input, generate a **single, clear instruction** for creating a dataset. The instruction should describe what kind of data to generate, in natural and concise language.
81
+ 3. If the number of rows is not explicitly mentioned, default to **10 rows**.
82
+ 4. **Only return a string** with one key: `"instructions"`.
83
+
84
+ ---
85
+
86
+ ### Output Format:
87
+
88
+ "instructions": "Generate 1000 rows of employee salary data based on..."
89
+
90
+
91
+ """
92
+ response_system_prompt = """
93
+ You are a **Data Generation Agent** responsible for generating **structured data responses** based on the given instructions.
94
+
95
+ ---
96
+
97
+ ### Your Task:
98
+ 1. Read and understand the provided **instructions**.
99
+ 2. Generate the appropriate **data or description** that directly fulfills the instructions.
100
+ 3. Return only a **JSON string** with one key: `"response"`.
101
+
102
+ ---
103
+
104
+ ### Output Format:
105
+
106
+ "response": "Here is the data..."
107
+
108
+
109
+ """
110
+
111
+
112
+
113
+ def generate_data(query : str) -> str:
114
+ try:
115
+ query_llm = ChatGoogleGenerativeAI(
116
+ model="gemini-2.0-flash ", # gemini-2.0-flash or gemini-2.5-flash-preview-04-17
117
+ api_key=GOOGLE_API_KEY,
118
+ temperature=0.9,
119
+ )
120
+ query_prompt = ChatPromptTemplate.from_messages([
121
+ ("system", query_system_prompt),
122
+ ("human", "{input}"),
123
+ ("assistant", "{agent_scratchpad}"),
124
+ ])
125
+ chain: Runnable = query_prompt | query_llm
126
+ result = chain.invoke({"input": query, "agent_scratchpad": ""})
127
+ return result
128
+ except Exception as e:
129
+ print(f"Error in generate_data: {str(e)}")
130
+ raise
131
+
132
+ def generate_response(instructions : str) -> str:
133
+ try:
134
+ response_llm = ChatGoogleGenerativeAI(
135
+ model="gemini-2.0-flash",
136
+ api_key=GOOGLE_API_KEY,
137
+ temperature=0.9,
138
+ )
139
+ response_prompt = ChatPromptTemplate.from_messages([
140
+ ("system", response_system_prompt),
141
+ ("human", "{instructions}"),
142
+ ("assistant", "{agent_scratchpad}"),
143
+ ])
144
+ chain: Runnable = response_prompt | response_llm
145
+ result = chain.invoke({"instructions": instructions, "agent_scratchpad": ""})
146
+ return result
147
+ except Exception as e:
148
+ print(f"Error in generate_response: {str(e)}")
149
+ raise
150
+
151
+ def save_to_csv(data: str):
152
+ try:
153
+ # Clean the input string by removing triple quotes
154
+
155
+ print(f"here is the data {data}")
156
+ # Parse the JSON string into a Python dictionary
157
+ if ('json') in data:
158
+ print(f"json is in data")
159
+ if "```" in data:
160
+ print(f"``` is in data")
161
+ data = data.replace("json", "").strip()
162
+ data = data.replace("```", "").strip()
163
+ data_dict = json.loads(data)
164
+ df = pd.DataFrame()
165
+ df['instructions'] = data_dict['instructions']
166
+ df['response'] = data_dict['response']
167
+
168
+ print(f"DataFrame shape: {df.shape}")
169
+ print(f"DataFrame columns: {df.columns.tolist()}")
170
+ print(f"here is df {df}")
171
+ # Save to CSV without index
172
+ print("\nSaving to CSV...")
173
+ output_path = "Data_File.csv"
174
+ df.to_csv(output_path, index=False)
175
+ return f"File scucessfully created"
176
+ else:
177
+ print(f"``` is not in data")
178
+ data = data.replace("json", "").strip()
179
+ print(f"data is {data}")
180
+ data_dict = json.loads(data)
181
+ print(f"data_dict is {data_dict}")
182
+ df = pd.DataFrame()
183
+ df['instructions'] = data_dict['instructions']
184
+ df['response'] = data_dict['response']
185
+
186
+ print(f"DataFrame shape: {df.shape}")
187
+ print(f"DataFrame columns: {df.columns.tolist()}")
188
+ print(f"here is df {df}")
189
+ # Save to CSV without index
190
+ print("\nSaving to CSV...")
191
+ output_path = "Data_File.csv"
192
+ df.to_csv(output_path, index=False)
193
+ return f"File scucessfully created"
194
+ # Convert the dictionary to a DataFrame
195
+ elif "```" in data:
196
+ print(f"``` is in data")
197
+ data = data.replace("```", "").strip()
198
+ data = json.loads(data)
199
+ df = pd.DataFrame()
200
+ df['instructions'] = data['instructions']
201
+ df['response'] = data['response']
202
+
203
+ # Save to CSV without index
204
+ print("\nSaving to CSV...")
205
+ output_path = "Data_File.csv"
206
+ df.to_csv(output_path, index=False)
207
+ return f"File created successfully"
208
+ else:
209
+ print(f"''' is not in data")
210
+ data = json.loads(data)
211
+ df = pd.DataFrame()
212
+ df['instructions'] = data['instructions']
213
+ df['response'] = data['response']
214
+
215
+ # Save to CSV without index
216
+ print("\nSaving to CSV...")
217
+ output_path = "Data_File.csv"
218
+ df.to_csv(output_path, index=False)
219
+ return f"File created successfully"
220
+
221
+
222
+ except json.JSONDecodeError as e:
223
+ raise ValueError(f"Invalid JSON data: {str(e)}")
224
+
225
+
226
+
227
+ generate_data_tool = Tool(
228
+ name="generate_data_tool",
229
+ description="Generate the data(Instructions) for the query",
230
+ func=generate_data
231
+ )
232
+ generate_response_tool = Tool(
233
+ name="generate_response_tool",
234
+ description="Generate the data(Response) for the instructions",
235
+ func=generate_response
236
+ )
237
+
238
+
239
+ csv_tool = Tool(
240
+ name="csv_tool",
241
+ description="Pass the JSON data after generating both instructions and responses, convert it into csv then save the csv file",
242
+ func=save_to_csv
243
+ )
244
+
245
+ tools = [generate_data_tool, generate_response_tool,csv_tool]
246
+
247
+
248
+
249
+
250
+ query_prompt = ChatPromptTemplate.from_messages([
251
+ ("system", system_prompt),
252
+ ("human", "{input}"),
253
+ MessagesPlaceholder(variable_name="agent_scratchpad")
254
+ ])
255
+
256
+ llm = ChatGoogleGenerativeAI(
257
+ model="gemini-2.0-flash",
258
+ api_key=GOOGLE_API_KEY,
259
+ temperature=0.9,
260
+ )
261
+
262
+ agent = create_openai_tools_agent(llm=llm, prompt=query_prompt, tools=tools)
263
+ data_agent = AgentExecutor(agent=agent, tools=tools, verbose=True)
264
+
265
+
266
+ def generate_data_agent(query: str):
267
+ try:
268
+ if not query:
269
+ return {"status": "error", "message": "Query cannot be empty", "csv_file": None}
270
+
271
+ print(f"Processing query: {query}")
272
+ result = data_agent.invoke({"input": query})
273
+ print(f"Agent execution result: {result['output']}")
274
+ save_to_csv(result['output'])
275
+ print(f"here is the result {type(result['output'])}")
276
+ # Check if data.csv was created
277
+ if os.path.exists("Data_File.csv"):
278
+ return {
279
+ "status": "success",
280
+ "message": "Created successfully! You can download the CSV file below.",
281
+ "csv_file": "Data_File.csv"
282
+ }
283
+ else:
284
+ return {
285
+ "status": "error",
286
+ "message": "Failed to generate data file",
287
+ "csv_file": None
288
+ }
289
+ except Exception as e:
290
+ print(f"Error in generate_data_agent: {str(e)}")
291
+ return {
292
+ "status": "error",
293
+ "message": f"An error occurred: {str(e)}",
294
+ "csv_file": None
295
+ }
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile CHANGED
@@ -1,20 +1,20 @@
1
- # Use Python 3.10 as base image
2
- FROM python:3.13-slim
3
-
4
- # Set working directory
5
- WORKDIR /app
6
-
7
- # Copy requirements first to leverage Docker cache
8
- COPY requirements.txt .
9
-
10
- # Install dependencies
11
- RUN pip install --no-cache-dir -r requirements.txt
12
-
13
- # Copy the rest of the application
14
- COPY . .
15
-
16
- # Expose the port the app runs on
17
- EXPOSE 7860
18
-
19
- # Command to run the application
20
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # Use Python 3.10 as base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy requirements first to leverage Docker cache
8
+ COPY requirements.txt .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the rest of the application
14
+ COPY . .
15
+
16
+ # Expose the port the app runs on
17
+ EXPOSE 7860
18
+
19
+ # Command to run the application
20
  CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,120 +1,112 @@
1
- ---
2
- license: mit
3
- title: Fine Tune Data Generation Agent
4
- sdk: docker
5
- colorFrom: blue
6
- colorTo: blue
7
- short_description: Generate your Fine-Tune Dataset only with one Query using AI
8
- ---
9
- # Data Generation Agent
10
-
11
- A LangChain-based agent that automatically generates diverse training data for fine-tuning LLM models. While optimized for customer support conversations, it can generate any type of instruction-response pairs, including but not limited to:
12
- - Customer service interactions
13
- - Technical support dialogues
14
- - Product inquiries
15
- - FAQ responses
16
- - Educational content
17
- - Code explanations
18
- - Creative writing prompts
19
-
20
- ![](image1.png)
21
-
22
- ![](image2.png)
23
-
24
- The agent creates structured data pairs (instructions and responses) in JSON format and saves them to CSV, making it easy to prepare training data for language models.
25
-
26
- ## Features
27
-
28
- - Generates structured data in JSON format
29
- - Supports custom data generation instructions
30
- - Automatically saves data to CSV format
31
- - Uses OpenAI's GPT models for data generation
32
- - Implements a two-step process: instruction generation and response generation
33
- - Versatile data generation for any domain or use case
34
- - Customizable output format and structure
35
-
36
- ## Prerequisites
37
-
38
- - Python 3.x
39
- - OpenAI API key
40
- - Required Python packages (install via pip):
41
- ```bash
42
- pip install langchain langchain-openai pandas numpy matplotlib python-dotenv
43
- ```
44
-
45
- ## Environment Setup
46
-
47
- 1. Create a `.env` file in the project root
48
- 2. Add your OpenAI API key:
49
- ```
50
- OPENAI_API_KEY=your_api_key_here
51
- ```
52
-
53
- ## Code Structure
54
-
55
- ### Main Components
56
-
57
- 1. **System Prompts**
58
- - `system_prompt`: Main agent prompt for overall data generation
59
- - `query_system_prompt`: Prompt for generating instructions
60
- - `response_system_prompt`: Prompt for generating responses
61
-
62
- 2. **Core Functions**
63
- - `generate_data(query)`: Generates instructions based on user query
64
- - `generate_response(instructions)`: Generates responses based on instructions
65
- - `save_to_csv(data)`: Saves generated data to CSV file
66
-
67
- 3. **Tools**
68
- - `generate_data_tool`: Tool for instruction generation
69
- - `generate_response_tool`: Tool for response generation
70
- - `csv_tool`: Tool for saving data to CSV
71
-
72
- ### Usage Example
73
-
74
- ```python
75
- query = "provide me amx customer support data atleast 100 rows"
76
- result = data_agent.invoke({"input": query})
77
- ```
78
-
79
- ## Output Format
80
-
81
- The agent generates data in the following JSON format:
82
- ```json
83
- {
84
- "instructions": ["instruction1", "instruction2", ...],
85
- "response": ["response1", "response2", ...]
86
- }
87
- ```
88
-
89
- ## Data Generation Process
90
-
91
- 1. **Instruction Generation**
92
- - Takes user query as input
93
- - Generates natural language instructions
94
- - Returns JSON with "instructions" key
95
-
96
- 2. **Response Generation**
97
- - Takes instructions as input
98
- - Generates corresponding responses
99
- - Returns JSON with "response" key
100
-
101
- 3. **Data Storage**
102
- - Converts JSON data to DataFrame
103
- - Saves to CSV file named "data.csv"
104
-
105
- ## Configuration
106
-
107
- - Model: GPT-4 (configurable via `model` parameter)
108
- - Temperature: 0.8 (configurable)
109
- - Default row count: 1000 (if not specified in query)
110
-
111
- ## Error Handling
112
-
113
- The code includes basic error handling for:
114
- - JSON parsing
115
- - CSV file operations
116
- - API calls
117
-
118
- ## Contributing
119
-
120
- Feel free to submit issues and enhancement requests!
 
1
+ # Data Generation Agent
2
+
3
+ A LangChain-based agent that automatically generates diverse training data for fine-tuning LLM models. While optimized for customer support conversations, it can generate any type of instruction-response pairs, including but not limited to:
4
+ - Customer service interactions
5
+ - Technical support dialogues
6
+ - Product inquiries
7
+ - FAQ responses
8
+ - Educational content
9
+ - Code explanations
10
+ - Creative writing prompts
11
+
12
+ ![](image1.png)
13
+
14
+ ![](image2.png)
15
+
16
+ The agent creates structured data pairs (instructions and responses) in JSON format and saves them to CSV, making it easy to prepare training data for language models.
17
+
18
+ ## Features
19
+
20
+ - Generates structured data in JSON format
21
+ - Supports custom data generation instructions
22
+ - Automatically saves data to CSV format
23
+ - Uses OpenAI's GPT models for data generation
24
+ - Implements a two-step process: instruction generation and response generation
25
+ - Versatile data generation for any domain or use case
26
+ - Customizable output format and structure
27
+
28
+ ## Prerequisites
29
+
30
+ - Python 3.x
31
+ - OpenAI API key
32
+ - Required Python packages (install via pip):
33
+ ```bash
34
+ pip install langchain langchain-openai pandas numpy matplotlib python-dotenv
35
+ ```
36
+
37
+ ## Environment Setup
38
+
39
+ 1. Create a `.env` file in the project root
40
+ 2. Add your OpenAI API key:
41
+ ```
42
+ OPENAI_API_KEY=your_api_key_here
43
+ ```
44
+
45
+ ## Code Structure
46
+
47
+ ### Main Components
48
+
49
+ 1. **System Prompts**
50
+ - `system_prompt`: Main agent prompt for overall data generation
51
+ - `query_system_prompt`: Prompt for generating instructions
52
+ - `response_system_prompt`: Prompt for generating responses
53
+
54
+ 2. **Core Functions**
55
+ - `generate_data(query)`: Generates instructions based on user query
56
+ - `generate_response(instructions)`: Generates responses based on instructions
57
+ - `save_to_csv(data)`: Saves generated data to CSV file
58
+
59
+ 3. **Tools**
60
+ - `generate_data_tool`: Tool for instruction generation
61
+ - `generate_response_tool`: Tool for response generation
62
+ - `csv_tool`: Tool for saving data to CSV
63
+
64
+ ### Usage Example
65
+
66
+ ```python
67
+ query = "provide me amx customer support data atleast 100 rows"
68
+ result = data_agent.invoke({"input": query})
69
+ ```
70
+
71
+ ## Output Format
72
+
73
+ The agent generates data in the following JSON format:
74
+ ```json
75
+ {
76
+ "instructions": ["instruction1", "instruction2", ...],
77
+ "response": ["response1", "response2", ...]
78
+ }
79
+ ```
80
+
81
+ ## Data Generation Process
82
+
83
+ 1. **Instruction Generation**
84
+ - Takes user query as input
85
+ - Generates natural language instructions
86
+ - Returns JSON with "instructions" key
87
+
88
+ 2. **Response Generation**
89
+ - Takes instructions as input
90
+ - Generates corresponding responses
91
+ - Returns JSON with "response" key
92
+
93
+ 3. **Data Storage**
94
+ - Converts JSON data to DataFrame
95
+ - Saves to CSV file named "data.csv"
96
+
97
+ ## Configuration
98
+
99
+ - Model: GPT-4 (configurable via `model` parameter)
100
+ - Temperature: 0.8 (configurable)
101
+ - Default row count: 1000 (if not specified in query)
102
+
103
+ ## Error Handling
104
+
105
+ The code includes basic error handling for:
106
+ - JSON parsing
107
+ - CSV file operations
108
+ - API calls
109
+
110
+ ## Contributing
111
+
112
+ Feel free to submit issues and enhancement requests!
 
 
 
 
 
 
 
 
main.py CHANGED
@@ -1,88 +1,83 @@
1
- import fastapi
2
- from fastapi import FastAPI, HTTPException, Request
3
- from fastapi.responses import FileResponse, JSONResponse
4
- from fastapi.staticfiles import StaticFiles
5
- import json
6
- from fastapi.responses import HTMLResponse
7
- from fastapi.templating import Jinja2Templates
8
- import uvicorn
9
- from pydantic import BaseModel
10
- from typing import List, Optional
11
- from Data_Geneartion_Agent import generate_data_agent
12
- import os
13
- from dotenv import load_dotenv
14
-
15
- app = FastAPI()
16
- from fastapi.middleware.cors import CORSMiddleware
17
-
18
- app.add_middleware(
19
- CORSMiddleware,
20
- allow_origins=["*"], # or your Netlify frontend URL
21
- allow_credentials=True,
22
- allow_methods=["*"],
23
- allow_headers=["*"],
24
- )
25
- # Mount static files
26
- templates = Jinja2Templates(directory="templates")
27
- # app.mount("/static", StaticFiles(directory="static"), name="static")
28
-
29
- class Query(BaseModel):
30
- # api_key: str
31
- query: str
32
-
33
- class Result(BaseModel):
34
- status: str
35
- message: str
36
- csv_file: Optional[str] = None
37
-
38
- def save_api_key(api_key: str):
39
- """Save API key to .env file"""
40
- # with open(".env", "w") as f:
41
- # f.write(f"GOOGLE_API_KEY={api_key}\n")
42
- # with open("runtime_env.json", "w") as f:
43
- # json.dump({"API_KEY": api_key}, f)
44
- # Reload environment variables
45
- load_dotenv()
46
-
47
- @app.get("/",response_class=HTMLResponse)
48
- async def root(request: Request):
49
- return templates.TemplateResponse("index.html",{"request":request})
50
-
51
- # class ApiKey(BaseModel):
52
- # api_key: str
53
-
54
- # @app.post("/api/save_api_key")
55
- # async def save_api_key(api_key: ApiKey):
56
- # with open(".env", "w") as f:
57
- # f.write(f"GOOGLE_API_KEY={api_key.api_key}")
58
- # return JSONResponse(content={"message": "API key saved successfully"}, status_code=200)
59
-
60
- @app.post("/generate")
61
- async def generate(query: Query):
62
- try:
63
- # Save the API key to .env file
64
- # print(f"Here is api_key in main.py {query.api_key}")
65
-
66
- # save_api_key(query.api_key)
67
-
68
- # Generate data using the agent with the new API key
69
- result = generate_data_agent(query.query)
70
- print(f"Here is the final result {result}")
71
- return result
72
- except Exception as e:
73
- raise HTTPException(status_code=500, detail=str(e))
74
-
75
- @app.get("/download/{filename}")
76
- async def download_file(filename: str):
77
- try:
78
- print(f"Here is file filename in main.py {filename}")
79
- return FileResponse(
80
- path=filename,
81
- filename=filename,
82
- media_type="text/csv"
83
- )
84
- except Exception as e:
85
- raise HTTPException(status_code=404, detail="File not found")
86
-
87
- if __name__ == "__main__":
88
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ import fastapi
2
+ from fastapi import FastAPI, HTTPException, Request
3
+ from fastapi.responses import FileResponse, JSONResponse
4
+ from fastapi.staticfiles import StaticFiles
5
+ from fastapi.responses import HTMLResponse
6
+ from fastapi.templating import Jinja2Templates
7
+ import uvicorn
8
+ from pydantic import BaseModel
9
+ from typing import List, Optional
10
+ from Data_Geneartion_Agent import generate_data_agent
11
+ import os
12
+ from dotenv import load_dotenv
13
+
14
+ app = FastAPI()
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"], # or your Netlify frontend URL
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+ # Mount static files
25
+ templates = Jinja2Templates(directory="templates")
26
+ app.mount("/static", StaticFiles(directory="static"), name="static")
27
+
28
+ class Query(BaseModel):
29
+ api_key: str
30
+ query: str
31
+
32
+ class Result(BaseModel):
33
+ status: str
34
+ message: str
35
+ csv_file: Optional[str] = None
36
+
37
+ def save_api_key(api_key: str):
38
+ """Save API key to .env file"""
39
+ with open(".env", "w") as f:
40
+ f.write(f"GOOGLE_API_KEY={api_key}\n")
41
+ # Reload environment variables
42
+ load_dotenv()
43
+
44
+ @app.get("/",response_class=HTMLResponse)
45
+ async def root(request: Request):
46
+ return templates.TemplateResponse("index.html",{"request":request})
47
+
48
+ # class ApiKey(BaseModel):
49
+ # api_key: str
50
+
51
+ # @app.post("/api/save_api_key")
52
+ # async def save_api_key(api_key: ApiKey):
53
+ # with open(".env", "w") as f:
54
+ # f.write(f"GOOGLE_API_KEY={api_key.api_key}")
55
+ # return JSONResponse(content={"message": "API key saved successfully"}, status_code=200)
56
+
57
+ @app.post("/generate")
58
+ async def generate(query: Query):
59
+ try:
60
+ # Save the API key to .env file
61
+ save_api_key(query.api_key)
62
+
63
+ # Generate data using the agent with the new API key
64
+ result = generate_data_agent(query.query)
65
+ print(f"Here is the final result {result}")
66
+ return result
67
+ except Exception as e:
68
+ raise HTTPException(status_code=500, detail=str(e))
69
+
70
+ @app.get("/download/{filename}")
71
+ async def download_file(filename: str):
72
+ try:
73
+ print(f"Here is file filename in main.py {filename}")
74
+ return FileResponse(
75
+ path=filename,
76
+ filename=filename,
77
+ media_type="text/csv"
78
+ )
79
+ except Exception as e:
80
+ raise HTTPException(status_code=404, detail="File not found")
81
+
82
+ if __name__ == "__main__":
83
+ uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
 
requirements.txt CHANGED
@@ -1,12 +1,11 @@
1
- fastapi>=0.109.0
2
- uvicorn>=0.27.0
3
- python-dotenv>=1.0.0
4
- jinja2>=3.1.3
5
- pydantic>=2.6.0
6
- pandas>=2.2.0
7
- numpy>=1.26.4
8
-
9
- # ✅ Compatible Langchain stack
10
- langchain>=0.1.0
11
- langchain-google-genai>=0.0.6
12
- langchain-core>=0.1.10
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ python-dotenv==1.0.0
4
+ jinja2==3.1.2
5
+ pydantic==2.4.2
6
+ pandas==1.5.3
7
+ numpy==1.26.2
8
+ langchain==0.0.350
9
+ langchain-google-genai==0.0.5
10
+ langchain-core==0.1.10
11
+ langchain-openai==0.0.5
 
runtime_env.json DELETED
@@ -1 +0,0 @@
1
- {"API_KEY": "AIzaSyCM58jXv44b9TjbvLUzsTZ2secXHcwT-AI"}
 
 
templates/index.html DELETED
@@ -1,495 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Fine Tune Data Generation Agent</title>
7
- <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
8
- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
9
- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/4.1.1/animate.min.css">
10
- <style>
11
- :root {
12
- --primary-color: #8B5CF6;
13
- --secondary-color: #EC4899;
14
- --accent-color: #10B981;
15
- --text-color: #ffffff;
16
- --bg-color: #0F172A;
17
- --chat-bg: #1E293B;
18
- --nav-bg: rgba(15, 23, 42, 0.95);
19
- }
20
-
21
- body {
22
- background-color: var(--bg-color);
23
- color: var(--text-color);
24
- min-height: 100vh;
25
- position: relative;
26
- overflow-x: hidden;
27
- font-family: 'Inter', sans-serif;
28
- padding-top: 70px;
29
- }
30
-
31
- .navbar {
32
- background-color: var(--nav-bg);
33
- backdrop-filter: blur(10px);
34
- border-bottom: 1px solid rgba(255, 255, 255, 0.1);
35
- padding: 0.5rem 1rem;
36
- }
37
-
38
- .nav-note {
39
- font-size: 0.85rem;
40
- color: #ff6b6b;
41
- padding: 0.5rem;
42
- border-radius: 0.5rem;
43
- background: rgba(255, 107, 107, 0.1);
44
- border-left: 3px solid #ff6b6b;
45
- }
46
-
47
- .container {
48
- position: relative;
49
- z-index: 2;
50
- max-width: 1200px;
51
- margin: 0 auto;
52
- padding: 0 20px;
53
- height: calc(100vh - 70px);
54
- display: flex;
55
- flex-direction: column;
56
- }
57
-
58
- .chat-container {
59
- flex: 1;
60
- overflow-y: auto;
61
- padding: 1.5rem 2rem;
62
- margin-bottom: 100px;
63
- position: relative;
64
- z-index: 2;
65
- }
66
-
67
- .message {
68
- display: flex;
69
- margin-bottom: 1.5rem;
70
- animation: slideIn 0.3s ease-out;
71
- position: relative;
72
- z-index: 3;
73
- }
74
-
75
- .message.user {
76
- justify-content: flex-end;
77
- }
78
-
79
- .message-content {
80
- max-width: 70%;
81
- padding: 1rem 1.5rem;
82
- border-radius: 1rem;
83
- position: relative;
84
- transform-style: preserve-3d;
85
- transition: transform 0.3s ease;
86
- }
87
-
88
- .message.user .message-content {
89
- background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
90
- margin-left: auto;
91
- }
92
-
93
- .message.bot .message-content {
94
- background: var(--chat-bg);
95
- border: 1px solid rgba(255, 255, 255, 0.1);
96
- }
97
-
98
- .message-content:hover {
99
- transform: translateZ(20px) rotateX(5deg);
100
- }
101
-
102
- .query-box {
103
- position: fixed;
104
- bottom: 0;
105
- left: 0;
106
- right: 0;
107
- padding: 1.5rem;
108
- background: rgba(15, 23, 42, 0.95);
109
- backdrop-filter: blur(10px);
110
- border-top: 1px solid rgba(255, 255, 255, 0.1);
111
- z-index: 10;
112
- }
113
-
114
- .query-input {
115
- background-color: var(--chat-bg);
116
- border: 2px solid rgba(139, 92, 246, 0.3);
117
- color: var(--text-color);
118
- padding: 1.2rem;
119
- border-radius: 1rem;
120
- font-size: 1.1rem;
121
- transition: all 0.3s ease;
122
- }
123
-
124
- .query-input:focus {
125
- border-color: var(--primary-color);
126
- box-shadow: 0 0 0 3px rgba(139, 92, 246, 0.2);
127
- outline: none;
128
- }
129
-
130
- .btn-primary {
131
- background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
132
- border: none;
133
- padding: 1.2rem 2rem;
134
- border-radius: 1rem;
135
- font-weight: 600;
136
- transition: all 0.3s ease;
137
- }
138
-
139
- .btn-primary:hover {
140
- transform: translateY(-2px);
141
- box-shadow: 0 5px 15px rgba(139, 92, 246, 0.4);
142
- }
143
-
144
- /* Enhanced Download Button */
145
- .download-btn {
146
- position: relative;
147
- overflow: hidden;
148
- transition: all 0.4s ease;
149
- display: inline-block;
150
- z-index: 20;
151
- }
152
-
153
- .download-btn::before {
154
- content: '';
155
- position: absolute;
156
- top: 0;
157
- left: -100%;
158
- width: 100%;
159
- height: 100%;
160
- background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
161
- transition: 0.5s;
162
- z-index: -1;
163
- }
164
-
165
- .download-btn:hover::before {
166
- left: 100%;
167
- }
168
-
169
- .download-btn:hover {
170
- transform: translateY(-3px);
171
- box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3);
172
- }
173
-
174
- .download-btn:active {
175
- transform: translateY(1px);
176
-
177
-
178
-
179
- }
180
-
181
- .download-btn .fa-download {
182
- transition: transform 0.3s ease;
183
- }
184
-
185
- .download-btn:hover .fa-download {
186
- transform: translateY(-3px);
187
- }
188
-
189
- .pulse {
190
- animation: pulse 2s infinite;
191
- }
192
-
193
- @keyframes pulse {
194
- 0% {
195
- box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
196
- }
197
- 70% {
198
- box-shadow: 0 0 0 10px rgba(16, 185, 129, 0);
199
- }
200
- 100% {
201
- box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
202
- }
203
- }
204
-
205
- .floating-particles {
206
- position: fixed;
207
- top: 0;
208
- left: 0;
209
- width: 100%;
210
- height: 100%;
211
- pointer-events: none;
212
- z-index: 1;
213
- }
214
-
215
- /* Make sure canvas doesn't block clicks */
216
- .floating-particles canvas {
217
- pointer-events: none !important;
218
- }
219
-
220
- .loader {
221
- display: inline-block;
222
- width: 20px;
223
- height: 20px;
224
- border: 3px solid rgba(255,255,255,.3);
225
- border-radius: 50%;
226
- border-top-color: var(--accent-color);
227
- animation: spin 1s ease-in-out infinite;
228
- margin-right: 10px;
229
- vertical-align: middle;
230
- }
231
-
232
- @keyframes spin {
233
- to { transform: rotate(360deg); }
234
- }
235
-
236
- @keyframes slideIn {
237
- from {
238
- opacity: 0;
239
- transform: translateY(20px);
240
- }
241
- to {
242
- opacity: 1;
243
- transform: translateY(0);
244
- }
245
- }
246
- </style>
247
- </head>
248
- <body>
249
- <!-- Navbar -->
250
- <nav class="navbar fixed-top navbar-expand-lg">
251
- <div class="container-fluid">
252
- <a class="navbar-brand d-flex align-items-center" href="#">
253
- <i class="fas fa-database me-2"></i>
254
- <span class="fw-bold">Fine Tune Data Generator</span>
255
- </a>
256
- <div class="d-flex">
257
- <div class="nav-note me-3">
258
- <i class="fas fa-exclamation-circle me-2"></i>
259
- If you find errors (similar wrong format or unable to detect model), please try again later or contact the developer
260
- </div>
261
- <!-- <div class="api-key-input">
262
- <input type="password" id="apiKeyInput" class="form-control" placeholder="Enter Gemini API Key" style="background-color: var(--chat-bg); color: var(--text-color); border: 1px solid rgba(255, 255, 255, 0.1);">
263
- </div> -->
264
- </div>
265
- </div>
266
- </nav>
267
-
268
- <div class="floating-particles" id="particles"></div>
269
-
270
- <div class="container">
271
- <div class="chat-container" id="chatContainer">
272
- <div class="message bot">
273
- <div class="message-content">
274
-
275
- <h3><i class="fas fa-robot me-2"></i>Welcome to Fine Tune Data Generation Agent</h3>
276
- <p>I'm here to help you generate datasets. What would you like to create?</p>
277
- </div>
278
- </div>
279
- </div>
280
-
281
-
282
- <div class="query-box">
283
- <form id="queryForm" class="d-flex">
284
- <input type="text" id="queryInput" class="form-control query-input" placeholder="Enter your query to generate dataset...">
285
- <button type="submit" class="btn btn-primary ms-2">
286
- <i class="fas fa-paper-plane me-2"></i>Send
287
- </button>
288
- </form>
289
- <div class="contact-links mt-2 text-center" style="font-size: 0.9rem;">
290
- <a href="https://www.linkedin.com/in/harsimransinghtech/" target="_blank" class="text-light me-3" style="text-decoration: none;">
291
- <i class="fab fa-linkedin"></i> LinkedIn
292
- </a>
293
- <a href="https://github.com/harsimran726" target="_blank" class="text-light" style="text-decoration: none;">
294
- <i class="fab fa-github"></i> GitHub
295
- </a>
296
- </div>
297
- </div>
298
- </div>
299
-
300
- <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"></script>
301
- <script src="https://cdn.jsdelivr.net/npm/three@0.132.2/build/three.min.js"></script>
302
- <script>
303
- // 3D Particles Animation with fixed z-index
304
- const scene = new THREE.Scene();
305
- const camera = new THREE.PerspectiveCamera(75, window.innerWidth / window.innerHeight, 0.1, 1000);
306
- const renderer = new THREE.WebGLRenderer({
307
- alpha: true,
308
- antialias: true
309
- });
310
- const particlesContainer = document.getElementById('particles');
311
-
312
- renderer.setSize(window.innerWidth, window.innerHeight);
313
- renderer.domElement.style.position = 'fixed';
314
- renderer.domElement.style.top = '0';
315
- renderer.domElement.style.left = '0';
316
- renderer.domElement.style.zIndex = '1';
317
- renderer.domElement.style.pointerEvents = 'none'; // Ensure no interaction
318
- particlesContainer.appendChild(renderer.domElement);
319
-
320
- const particles = [];
321
- const particleCount = 25; // Reduced for performance
322
-
323
- for (let i = 0; i < particleCount; i++) {
324
- const geometry = new THREE.IcosahedronGeometry(0.15, 1); // Smoother shape
325
- const material = new THREE.MeshBasicMaterial({
326
- color: new THREE.Color(
327
- Math.random() * 0.5 + 0.5,
328
- Math.random() * 0.5,
329
- Math.random() * 0.5 + 0.5
330
- ),
331
- transparent: true,
332
- opacity: 0.4
333
- });
334
- const particle = new THREE.Mesh(geometry, material);
335
-
336
- particle.position.x = Math.random() * 10 - 5;
337
- particle.position.y = Math.random() * 10 - 5;
338
- particle.position.z = Math.random() * 10 - 5;
339
-
340
- particles.push(particle);
341
- scene.add(particle);
342
- }
343
-
344
- camera.position.z = 5;
345
-
346
- function animate() {
347
- requestAnimationFrame(animate);
348
-
349
- particles.forEach(particle => {
350
- particle.rotation.x += 0.005;
351
- particle.rotation.y += 0.005;
352
- particle.position.y += Math.sin(Date.now() * 0.001) * 0.005;
353
- });
354
-
355
- renderer.render(scene, camera);
356
- }
357
-
358
- animate();
359
-
360
- // Form submission handling
361
- document.getElementById('queryForm').addEventListener('submit', async (e) => {
362
- e.preventDefault();
363
- const query = document.getElementById('queryInput').value.trim();
364
- // const apiKey = document.getElementById('apiKeyInput').value.trim();
365
-
366
- if (!query) return;
367
- // if (!apiKey) {
368
- // const errorMessage = document.createElement('div');
369
- // errorMessage.className = 'message bot';
370
- // errorMessage.innerHTML = `
371
- // <div class="message-content">
372
- // <p class="text-danger"><i class="fas fa-exclamation-triangle me-2"></i>Please enter your Gemini API Key first</p>
373
- // </div>
374
- // `;
375
- // document.getElementById('chatContainer').appendChild(errorMessage);
376
- // return;
377
- // }
378
-
379
- const chatContainer = document.getElementById('chatContainer');
380
-
381
- // Add user message
382
- const userMessage = document.createElement('div');
383
- userMessage.className = 'message user';
384
- userMessage.innerHTML = `
385
- <div class="message-content">
386
- ${query}
387
- </div>
388
- `;
389
- chatContainer.appendChild(userMessage);
390
-
391
- try {
392
- // Show loading message
393
- const loadingMessage = document.createElement('div');
394
- loadingMessage.className = 'message bot';
395
- loadingMessage.innerHTML = `
396
- <div class="message-content">
397
- <p><span class="loader"></span>Generating your dataset...</p>
398
- </div>
399
- `;
400
- chatContainer.appendChild(loadingMessage);
401
-
402
- // Clear input
403
- document.getElementById('queryInput').value = '';
404
-
405
- // Scroll to bottom
406
- chatContainer.scrollTop = chatContainer.scrollHeight;
407
-
408
- // Make API call
409
- const response = await fetch('/generate', {
410
- method: 'POST',
411
- headers: {
412
- 'Content-Type': 'application/json',
413
- },
414
- body: JSON.stringify({
415
- query: query,
416
- // api_key: apiKey
417
- })
418
- });
419
-
420
- if (!response.ok) {
421
- throw new Error('Failed to generate data. Server responded with status: ' + response.status);
422
- }
423
-
424
- const result = await response.json();
425
-
426
- // Remove loading message
427
- chatContainer.removeChild(loadingMessage);
428
-
429
- // Add bot response message
430
- const botMessage = document.createElement('div');
431
- botMessage.className = 'message bot';
432
-
433
- let messageContent = `
434
- <div class="message-content">
435
- <p>${result.message}</p>
436
- `;
437
-
438
- // Add download button if CSV file is available
439
- if (result.csv_file) {
440
- messageContent += `
441
-
442
- <a href="/download/tmp/${result.csv_file}" class="btn btn-primary download-btn pulse" download="${result.csv_file}">
443
- <i class="fas fa-download me-2"></i>Download Dataset
444
- </a>
445
- `;
446
- }
447
-
448
- messageContent += `</div>`;
449
- botMessage.innerHTML = messageContent;
450
- chatContainer.appendChild(botMessage);
451
-
452
- // Scroll to bottom
453
- chatContainer.scrollTop = chatContainer.scrollHeight;
454
-
455
- // Add click handler to download button
456
- const downloadBtn = botMessage.querySelector('.download-btn');
457
- if (downloadBtn) {
458
- downloadBtn.addEventListener('click', function(e) {
459
- console.log('Download initiated:', this.href);
460
- // Optional: Track download event
461
- });
462
- }
463
-
464
- } catch (error) {
465
- // Remove loading message if it exists
466
- const loadingMessages = chatContainer.querySelectorAll('.message');
467
- const lastMessage = loadingMessages[loadingMessages.length - 1];
468
- if (lastMessage && lastMessage.querySelector('.loader')) {
469
- chatContainer.removeChild(lastMessage);
470
- }
471
-
472
- // Add error message
473
- const errorMessage = document.createElement('div');
474
- errorMessage.className = 'message bot';
475
- errorMessage.innerHTML = `
476
- <div class="message-content">
477
- <p class="text-danger"><i class="fas fa-exclamation-triangle me-2"></i>${error.message}</p>
478
- </div>
479
- `;
480
- chatContainer.appendChild(errorMessage);
481
-
482
- // Scroll to bottom
483
- chatContainer.scrollTop = chatContainer.scrollHeight;
484
- }
485
- });
486
-
487
- // Handle window resize
488
- window.addEventListener('resize', () => {
489
- camera.aspect = window.innerWidth / window.innerHeight;
490
- camera.updateProjectionMatrix();
491
- renderer.setSize(window.innerWidth, window.innerHeight);
492
- });
493
- </script>
494
- </body>
495
- </html>