Spaces:

lisekarimi
/

datagen

Sleeping

App Files Files Community

datagen / src /datagen.py

lisekarimi

Deploy version 0.1.0

db17eb5 9 months ago

raw

history blame contribute delete

2.05 kB

	"""Main data generation class for creating synthetic datasets using AI models."""

	import os
	from datetime import datetime
	from .prompts import build_user_prompt, system_message
	from .models import get_gpt_completion, get_claude_completion
	from .utils import execute_code_in_virtualenv
	from .constants import OUTPUT_DIR, logger


	class DataGen:
	"""Handles synthetic data generation using AI models."""

	def __init__(self, output_dir=None):
	"""Initialize the data generator with output directory."""
	# Use provided output_dir, or fall back to OUTPUT_DIR constant
	self.output_dir = output_dir or OUTPUT_DIR
	os.makedirs(self.output_dir, exist_ok=True)

	def get_timestamp(self):
	"""Return current timestamp for file naming."""
	return datetime.now().strftime("%Y%m%d_%H%M%S")

	def generate_dataset(self, **input_data):
	"""Generate synthetic dataset based on input parameters and model choice."""
	try:
	# Ensure output directory exists before generating
	os.makedirs(self.output_dir, exist_ok=True)

	# Add output directory path to input data for file generation
	input_data["file_path"] = self.output_dir

	# Build the prompt to send to the selected LLM
	prompt = build_user_prompt(**input_data)

	# Call the selected LLM based on the model parameter
	if input_data["model"] == "GPT":
	code = get_gpt_completion(prompt, system_message)
	elif input_data["model"] == "Claude":
	code = get_claude_completion(prompt, system_message)
	else:
	raise ValueError("Invalid model selected.")

	# Execute the generated code and return the output file path
	file_path = execute_code_in_virtualenv(code)
	return file_path

	except Exception as e:
	# Log and re-raise any errors that occur during generation
	logger.error(f"Error in generate_dataset: {e}")
	raise