Spaces:

George-API
/

phi4training

Sleeping

App Files Files Community

phi4training / app.py

George-API

Upload folder using huggingface_hub

a57357b verified 9 months ago

raw

history blame

6.27 kB

	import gradio as gr
	import os
	import subprocess
	import sys
	import json
	import re
	from threading import Thread
	import datetime
	import torch
	import threading

	def load_env_variables():
	"""Load environment variables from system or .env file."""
	if os.environ.get("SPACE_ID"):
	print("Running in Hugging Face Space")
	if "/" in os.environ.get("SPACE_ID", ""):
	username = os.environ.get("SPACE_ID").split("/")[0]
	os.environ["HF_USERNAME"] = username
	print(f"Set HF_USERNAME from SPACE_ID: {username}")
	else:
	try:
	from dotenv import load_dotenv
	env_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env")
	if os.path.exists(env_path):
	load_dotenv(env_path)
	print(f"Loaded environment variables from {env_path}")
	except ImportError:
	print("python-dotenv not installed, skipping .env loading")

	def check_environment():
	"""Check the environment for GPU availability and other requirements."""
	env_info = {
	"System": {
	"Platform": sys.platform,
	"Python Version": sys.version.split()[0]
	},
	"GPU": {
	"CUDA Available": torch.cuda.is_available(),
	"Device Count": torch.cuda.device_count() if torch.cuda.is_available() else 0
	},
	"Environment Variables": {
	"HF_TOKEN": bool(os.environ.get("HF_TOKEN")),
	"HF_USERNAME": bool(os.environ.get("HF_USERNAME")),
	"HF_SPACE_NAME": bool(os.environ.get("HF_SPACE_NAME"))
	}
	}

	if torch.cuda.is_available():
	env_info["GPU"]["Device Name"] = torch.cuda.get_device_name(0)
	env_info["GPU"]["Memory (GB)"] = round(torch.cuda.get_device_properties(0).total_memory / (1024**3), 2)

	return env_info

	def run_training_process():
	"""Run the training process using the configuration files."""
	try:
	current_dir = os.path.dirname(os.path.abspath(__file__))
	training_script = os.path.join(current_dir, "run_transformers_training.py")

	# Start the training process
	process = subprocess.Popen(
	[sys.executable, training_script],
	stdout=subprocess.PIPE,
	stderr=subprocess.STDOUT,
	text=True,
	bufsize=1
	)

	# Process the output line by line
	for line in process.stdout:
	print(line.strip())

	process.wait()
	return process.returncode
	except Exception as e:
	print(f"Error in training process: {e}")
	return 1

	def start_training(learning_rate, num_train_epochs, per_device_train_batch_size,
	gradient_accumulation_steps):
	"""Start the training process with the specified parameters."""
	try:
	load_env_variables()
	current_dir = os.path.dirname(os.path.abspath(__file__))

	# Load and update transformers config
	with open(os.path.join(current_dir, "transformers_config.json"), "r") as f:
	config = json.load(f)

	# Update training parameters
	config["training"].update({
	"num_train_epochs": num_train_epochs,
	"learning_rate": learning_rate,
	"per_device_train_batch_size": per_device_train_batch_size,
	"gradient_accumulation_steps": gradient_accumulation_steps
	})

	# Update hub settings if username is available
	if os.environ.get("HF_USERNAME"):
	config["huggingface_hub"].update({
	"hub_model_id": f"{os.environ['HF_USERNAME']}/Phi4-Cognitive-Science"
	})

	# Save updated config
	with open(os.path.join(current_dir, "transformers_config.json"), "w") as f:
	json.dump(config, f, indent=4)

	# Start training in a separate thread
	thread = threading.Thread(target=run_training_process)
	thread.daemon = True
	thread.start()

	return "Training started! Check the Hugging Face Space logs for progress."
	except Exception as e:
	return f"Error starting training: {str(e)}"

	with gr.Blocks(title="Phi-4 Training Interface") as demo:
	gr.Markdown("# Phi-4 Unsupervised Training for Cognitive Science")

	with gr.Tab("Training"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("## Model Configuration")
	gr.Markdown("Model: unsloth/phi-4-unsloth-bnb-4bit")
	gr.Markdown("Dataset: George-API/cognitive-data")

	gr.Markdown("## Training Parameters")
	learning_rate = gr.Slider(minimum=1e-6, maximum=1e-4, value=2e-5, step=1e-6,
	label="Learning Rate")
	num_train_epochs = gr.Slider(minimum=1, maximum=5, value=3, step=1,
	label="Number of Epochs")
	per_device_train_batch_size = gr.Slider(minimum=4, maximum=24, value=12, step=4,
	label="Per Device Train Batch Size (Unsloth Optimized)")
	gradient_accumulation_steps = gr.Slider(minimum=1, maximum=8, value=4, step=1,
	label="Gradient Accumulation Steps")

	start_btn = gr.Button("Start Training", variant="primary")
	training_output = gr.Textbox(label="Training Output", interactive=False)

	with gr.Tab("Environment"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("## Environment Information")
	env_info = gr.JSON(label="Environment Info")
	check_env_btn = gr.Button("Check Environment")

	# Set up event handlers
	start_btn.click(
	fn=start_training,
	inputs=[learning_rate, num_train_epochs, per_device_train_batch_size, gradient_accumulation_steps],
	outputs=training_output
	)

	check_env_btn.click(
	fn=check_environment,
	inputs=[],
	outputs=env_info
	)

	if __name__ == "__main__":
	load_env_variables()
	demo.launch()