Spaces:
Build error
Build error
| import pandas as pd | |
| import os | |
| import gradio as gr | |
| import threading | |
| import time | |
| from groq import Groq | |
| # Initialize Groq client | |
| client = Groq() | |
| # Constants | |
| MAX_SIZE = 1.1 * 1024 * 1024 * 1024 # 1.1GB in bytes | |
| DATA_DIRECTORY = 'data' | |
| UPDATE_INTERVAL = 1 # Update interval in seconds | |
| # Ensure the data directory exists | |
| os.makedirs(DATA_DIRECTORY, exist_ok=True) | |
| # Initialize variables | |
| file_index = 1 | |
| current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv') | |
| file_paths = [current_file] | |
| combined_tokens = 0 | |
| # Helper function to get file size | |
| def get_file_size(filename): | |
| return os.path.getsize(filename) if os.path.isfile(filename) else 0 | |
| # Data generation and saving function | |
| def generate_and_save_data(): | |
| global file_index, current_file, file_paths, combined_tokens | |
| # Create the initial file if it doesn't exist | |
| if not os.path.isfile(current_file): | |
| pd.DataFrame(columns=["prompt", "response"]).to_csv(current_file, index=False) | |
| while True: | |
| try: | |
| # Generate a prompt | |
| completion = client.chat.completions.create( | |
| model="llama3-groq-70b-8192-tool-use-preview", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": "give me a single prompt to prompt an ai model, simulating what users could want from you. ensure that it is diverse and high quality. for each, choose a random writing style (though it has to be a common one), random length and random clarity of the prompt. ensure that it is a single prompt, and just the prompt itself, nothing else. eg, don't close the prompt in quotation marks or say Here is a single prompt that meets your requirements or anything similar to that" | |
| } | |
| ], | |
| temperature=1, | |
| max_tokens=1024, | |
| top_p=1, | |
| stream=True, | |
| stop=None, | |
| ) | |
| prompt = "" | |
| prompt_tokens = 0 | |
| for chunk in completion: | |
| content = chunk.choices[0].delta.content | |
| if content: | |
| prompt += content | |
| prompt_tokens += len(content.split()) | |
| # Use the generated prompt to query the model again | |
| second_completion = client.chat.completions.create( | |
| model="llama3-groq-70b-8192-tool-use-preview", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| temperature=1, | |
| max_tokens=5000, | |
| top_p=1, | |
| stream=True, | |
| stop=None, | |
| ) | |
| response = "" | |
| response_tokens = 0 | |
| for chunk in second_completion: | |
| content = chunk.choices[0].delta.content | |
| if content: | |
| response += content | |
| response_tokens += len(content.split()) | |
| # Update the combined token count | |
| combined_tokens += (prompt_tokens + response_tokens) | |
| # Print the generated prompt and the response | |
| print("Generated prompt:", prompt) | |
| print("Response to the generated prompt:", response) | |
| # Create a DataFrame with the prompt and response | |
| data = pd.DataFrame({"prompt": [prompt], "response": [response]}) | |
| # Check the size of the current file | |
| if get_file_size(current_file) >= MAX_SIZE: | |
| file_index += 1 | |
| current_file = os.path.join(DATA_DIRECTORY, f'data{file_index}.csv') | |
| file_paths.append(current_file) | |
| # Create the new file with headers | |
| with open(current_file, 'w') as f: | |
| data.to_csv(f, header=True, index=False) | |
| else: | |
| # Append data to the current file | |
| with open(current_file, 'a') as f: | |
| data.to_csv(f, header=False, index=False) | |
| # Wait for the next update interval | |
| time.sleep(UPDATE_INTERVAL) | |
| except Exception as e: | |
| print(f"An error occurred: {e}. Retrying in 5 seconds...") | |
| time.sleep(5) | |
| # Get available files | |
| def get_available_files(): | |
| return [f for f in file_paths if os.path.isfile(f)] | |
| # Update file list | |
| def update_file_list(): | |
| return gr.update(choices=get_available_files()) | |
| # Update token count | |
| def update_token_count(): | |
| return combined_tokens | |
| # Display file content | |
| def display_file_content(selected_file): | |
| if selected_file: | |
| return pd.read_csv(selected_file) | |
| return pd.DataFrame() | |
| # Start the data generation in a separate thread | |
| thread = threading.Thread(target=generate_and_save_data) | |
| thread.daemon = True | |
| thread.start() | |
| # Create Gradio interface | |
| with gr.Blocks() as app: | |
| gr.Markdown("## AI Prompt and Response Generator") | |
| gr.Markdown("This app continuously generates AI prompts and responses, and writes them to CSV files.") | |
| file_selector = gr.Dropdown(label="Select a data file to view and download", choices=get_available_files()) | |
| file_viewer = gr.DataFrame(label="CSV File Content") | |
| download_button = gr.File(label="Download Selected File") | |
| def download_file(selected_file): | |
| return selected_file | |
| refresh_button = gr.Button("Refresh File List") | |
| refresh_button.click(update_file_list, outputs=file_selector) | |
| file_selector.change(display_file_content, inputs=file_selector, outputs=file_viewer) | |
| file_selector.change(download_file, inputs=file_selector, outputs=download_button) | |
| token_display = gr.Textbox(label="Combined Tokens", value=str(update_token_count()), interactive=False) | |
| def update_token_display(): | |
| return str(update_token_count()) | |
| # Update the token count every second | |
| token_refresh = gr.Button("Refresh Token Count") | |
| token_refresh.click(update_token_display, outputs=token_display) | |
| app.launch() | |