Spaces:

luguog
/

evofartnet

Runtime error

App Files Files Community

evofartnet / app.py

luguog

Create app.py

00e4c32 verified 5 months ago

raw

history blame contribute delete

20.9 kB

	import os
	import io
	import json
	import random
	import time
	import threading
	import queue
	from dataclasses import dataclass
	from typing import Any, Dict, List, Tuple
	from pathlib import Path
	import warnings

	import torch
	import torch.nn as nn
	import torchaudio
	import torchaudio.transforms as T
	import torchaudio.functional as F
	import soundfile as sf
	import gradio as gr
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline as hf_pipeline
	import kagglehub
	import sounddevice as sd

	# --------------------------------------------------
	# CONFIGURATION
	# --------------------------------------------------
	@dataclass
	class Config:
	# Paths
	data_dir: Path = Path("./fart_dataset")
	syn_dir: Path = data_dir / "synthetic"
	log_file: Path = Path("fart-lab/logs/evolution_log.jsonl")
	model_path: Path = Path("evofart_net.pt") # EvoFartNet save

	# Audio
	target_sr: int = 16000
	chunk_len: float = 1.0
	n_mels: int = 128
	n_fft: int = 400
	hop_length: int = 160
	target_frames: int = 100

	# Training (Incremental Best Practices)
	batch_size: int = 32
	lr: float = 0.001
	replay_size: int = 200 # Buffer for anti-forgetting ()
	walk_window: int = 100
	flawless_acc: float = 0.99
	micro_neurons_min: int = 3
	micro_neurons_max: int = 9

	# LLM
	llm_model_id: str = "deepseek-ai/deepseek-r1-0528" # This is likely a Hugging Face model ID, not KaggleHub
	llm_max_tokens: int = 128

	# Cartman (Uncensored Diss Tracks)
	cartman_disses: List[str] = None

	def __post_init__(self):
	if self.cartman_disses is None:
	self.cartman_disses = [
	"This burp? Kyle's mom's morning breath—napalm level, hippie!",
	"Bark like a therapy dog? I'll fart on your therapy, Butters!",
	"Curse echo? 'Fahhhh-tboard!'—sounds like your dad's regrets."
	]
	self.syn_dir.mkdir(parents=True, exist_ok=True)
	self.log_file.parent.mkdir(parents=True, exist_ok=True)
	self.log_file.touch()

	config = Config()

	# Labels (Expanded Roast-Ready)
	SOUND_LABELS = ['fart', 'burp', 'bark', 'curse', 'door_slam', 'everyday_noise', 'not_sound']

	# Globals
	model = None # EvoFartNet
	llm_pipe = None
	llm_tokenizer = None
	training_active = False
	accuracy = 0.0
	mode = "synth"
	replay_buffer = [] # (wave, label, timestamp)
	q = queue.Queue()

	# --------------------------------------------------
	# LLM Setup (Micro-Neurons)
	# --------------------------------------------------
	def load_local_llm():
	global llm_pipe, llm_tokenizer
	if llm_pipe:
	return
	try:
	# The original code tried kagglehub.model_download with a Hugging Face ID
	# This will fail. We need to use transformers directly for Hugging Face models.
	print(f"[INFO] Attempting to load LLM from Hugging Face: {config.llm_model_id}")

	# According to the error, the model is already quantized with FineGrainedFP8Config.
	# Passing BitsAndBytesConfig explicitly might clash or be redundant.
	# Attempt to load without explicit BitsAndBytesConfig first.
	# If it requires 4-bit, we might need a custom quantization_config from DeepSeek's side,
	# or to verify if it can be loaded with load_in_4bit=True without a dedicated config object.
	# For now, let's remove the conflicting bnb_config.

	llm_tokenizer = AutoTokenizer.from_pretrained(config.llm_model_id, trust_remote_code=True)
	llm_model = AutoModelForCausalLM.from_pretrained(
	config.llm_model_id,
	device_map="auto",
	trust_remote_code=True # Removed quantization_config=bnb_config
	)
	llm_pipe = hf_pipeline("text-generation", model=llm_model, tokenizer=llm_tokenizer,
	max_new_tokens=config.llm_max_tokens, do_sample=True, temperature=0.8,
	pad_token_id=llm_tokenizer.eos_token_id)
	print("[INFO] DeepSeek-R1 loaded—jury's in session.")
	except Exception as e:
	warnings.warn(f"LLM load failed: {e}. Dummy mode.")
	llm_pipe = None

	load_local_llm()

	# --------------------------------------------------
	# EvoFartNet: Deeper CNN (w/ RNN Option)
	# --------------------------------0------------------
	class EvoFartNet(nn.Module):
	def __init__(self, num_classes=len(SOUND_LABELS), use_rnn=False):
	super().__init__()
	self.use_rnn = use_rnn
	self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
	self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
	self.pool = nn.MaxPool2d(2, 2)
	self.dropout = nn.Dropout(0.25)
	self.fc1 = nn.Linear(64 * 25 * 25, 128) # Post-pool
	if use_rnn:
	self.rnn = nn.LSTM(128, 64, batch_first=True, bidirectional=True)
	self.fc2 = nn.Linear(128, num_classes) # Bi-LSTM out
	else:
	self.fc2 = nn.Linear(128, num_classes)
	self.relu = nn.ReLU()

	def forward(self, x):
	x = self.pool(self.relu(self.conv1(x)))
	x = self.pool(self.relu(self.conv2(x)))
	x = x.view(-1, 64 * 25 * 25)
	x = self.dropout(self.relu(self.fc1(x)))
	if self.use_rnn:
	x = x.unsqueeze(1).repeat(1, x.shape[1], 1) # Fake seq for demo
	x, _ = self.rnn(x)
	x = x[:, -1, :]
	x = self.fc2(x)
	return x

	def load_or_init_model(use_rnn=False):
	global model
	if config.model_path.exists():
	try:
	model = EvoFartNet(use_rnn=use_rnn)
	model.load_state_dict(torch.load(config.model_path, map_location='cpu'))
	print(f"[INFO] Loaded EvoFartNet (RNN: {use_rnn}).")
	except Exception:
	pass
	if model is None:
	model = EvoFartNet(use_rnn=use_rnn)
	model.train()
	optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) # Best prac: Adam over SGD
	criterion = nn.CrossEntropyLoss()
	return optimizer, criterion

	optimizer, criterion = load_or_init_model(use_rnn=False) # Toggle RNN for echoes

	# --------------------------------------------------
	# Synth Gen w/ Torchaudio Augs
	# --------------------------------------------------
	def generate_synthetic_sound(label: str, duration: float = config.chunk_len) -> Tuple[np.ndarray, int]:
	sr = config.target_sr
	t = np.linspace(0, duration, int(sr * duration), endpoint=False)
	if label == 'fart':
	wave = 0.3 * np.sin(2 * np.pi * 90 * t) + 0.6 * np.random.normal(0, 0.2, len(t))
	elif label == 'burp':
	rumble = 0.5 * np.sin(2 * np.pi * 50 * t) * np.exp(-t * 5)
	pop_len = int(sr * 0.1)
	pop = 0.3 * np.random.normal(0, 0.5, pop_len) * np.exp(-np.linspace(0, 10, pop_len))
	pop = np.pad(pop, (0, len(t) - pop_len))
	wave = rumble + pop
	elif label == 'bark':
	freq_mod = 800 + 400 * np.sin(2 * np.pi * 10 * t)
	phase = 2 * np.pi * np.cumsum(freq_mod) / sr
	wave = 0.4 * np.sin(phase) + 0.2 * np.random.normal(0, 0.3, len(t))
	elif label == 'curse':
	# Toe-stub symphony: 3 angry bursts w/ 0.3s echo sim
	wave = np.zeros(len(t))
	for i in range(3):
	start = int(i * len(t)/4)
	end = int((i+1) * len(t)/4)
	sub_t = t[start:end] - t[start]
	burst = 0.4 * np.sin(2 * np.pi * (500 + 200 * np.sin(20 * sub_t)) * sub_t) + np.random.normal(0, 0.2, end-start)
	wave[start:end] += burst
	# Echo: 0.3s delay
	echo_start = int(start + sr * 0.3)
	if echo_start < end:
	echo_len = min(end - echo_start, len(burst))
	wave[echo_start:echo_start+echo_len] += 0.3 * burst[:echo_len]
	elif label == 'door_slam':
	impulse = np.zeros(len(t)); impulse[0] = 1.0
	decay = np.exp(-np.linspace(0, 5, int(sr * 0.5)))
	wave = np.convolve(impulse, decay)[:len(t)] + 0.1 * np.random.normal(0, 1, len(t))
	elif label == 'everyday_noise':
	sub = random.choice(['door_slam', 'bark'])
	wave, _ = generate_synthetic_sound(sub, duration)
	wave += 0.2 * np.random.normal(0, 0.1, len(t))
	else:
	wave = 0.1 * np.random.normal(0, 0.05, len(t))
	peak = np.max(np.abs(wave)) + 1e-9
	return (wave / peak).astype(np.float32), sr

	def augment_audio(waveform: torch.Tensor, sr: int) -> torch.Tensor:
	"""Torchaudio augs: Pitch shift, reverb, noise ()"""
	# Pitch shift
	shift = random.uniform(-2, 2) # Semitones
	waveform = F.pitch_shift(waveform, sr, shift)
	# Add noise
	noise = torch.randn_like(waveform) * 0.05
	waveform += noise
	# Sim reverb (RIR stub)
	if random.random() < 0.5:
	# Simple delay
	delay_samples = int(sr * random.uniform(0.05, 0.2))
	if delay_samples < waveform.shape[1]:
	echo = waveform[:, :waveform.shape[1] - delay_samples] * 0.3
	waveform = torch.cat([waveform, torch.zeros_like(waveform[:, :delay_samples])], dim=1)
	waveform[:, delay_samples:] += echo
	return waveform

	# --------------------------------------------------
	# Preprocess & Predict
	# --------------------------------------------------
	def preprocess_audio(wave: np.ndarray, sr: int, augment=False) -> torch.Tensor:
	if isinstance(wave, np.ndarray):
	waveform = torch.from_numpy(wave).unsqueeze(0).float()
	else:
	waveform = wave
	if sr != config.target_sr:
	resampler = T.Resample(sr, config.target_sr)
	waveform = resampler(waveform)
	if waveform.shape[0] > 1:
	waveform = torch.mean(waveform, dim=0, keepdim=True)
	max_samples = config.target_sr * config.chunk_len
	if waveform.shape[1] > max_samples:
	waveform = waveform[:, :max_samples]
	else:
	waveform = nn.functional.pad(waveform, (0, max_samples - waveform.shape[1]))
	if augment:
	waveform = augment_audio(waveform, config.target_sr)
	return waveform

	def predict(waveform: torch.Tensor) -> Dict[str, float]:
	if model is None:
	probs = np.random.dirichlet(np.ones(len(SOUND_LABELS)))
	return {label: float(p) for label, p in zip(SOUND_LABELS, probs)}
	model.eval()
	with torch.no_grad():
	mel = T.MelSpectrogram(sample_rate=config.target_sr, n_mels=config.n_mels,
	n_fft=config.n_fft, hop_length=config.hop_length)(waveform)
	mel = T.AmplitudeToDB()(mel)
	mel = (mel - mel.mean()) / (mel.std() + 1e-6)
	frames = mel.shape[2]
	if frames < config.target_frames:
	mel = nn.functional.pad(mel, (0, config.target_frames - frames))
	else:
	mel = mel[:, :, :config.target_frames]
	logits = model(mel.unsqueeze(0))
	probs = torch.softmax(logits, dim=1).squeeze(0).tolist()
	model.train()
	return {label: prob for label, prob in zip(SOUND_LABELS, probs)}

	# --------------------------------------------------
	# LLM Micro-Neurons (3-9 Votes)
	# --------------------------------------------------
	def llm_micro_vote(prompt_base: str) -> float:
	if llm_pipe is None:
	return random.random()
	num_votes = random.randint(config.micro_neurons_min, config.micro_neurons_max)
	votes = []
	for _ in range(num_votes):
	flair = random.choice(["Cartman testify: Agree or roast?", "Echo check: Naughty score?", "Jury: Guilty of being epic?"])
	mutated = f"{prompt_base} {flair}"
	try:
	out = llm_pipe(mutated, return_full_text=False)
	resp = out[0]["generated_text"].lower()
	vote = 1.0 if any(word in resp for word in ['agree', 'guilty', 'epic', 'roast approved']) else 0.5
	votes.append(vote)
	except:
	votes.append(0.5)
	return np.mean(votes)

	def hybrid_agree(cnn_probs: Dict[str, float], true_label: str, desc: str = "") -> float:
	pred_label = max(cnn_probs, key=cnn_probs.get)
	cnn_conf = cnn_probs.get(true_label, 0.0)
	prompt = f"Desc: {desc}. True: {true_label}, Pred: {pred_label} (conf {cnn_conf:.2f}). Vote 0-1?"
	llm_agree = llm_micro_vote(prompt)
	return 0.7 * cnn_conf + 0.3 * llm_agree # Weighted jury

	# --------------------------------------------------
	# Incremental Training Loop w/ Replay
	# --------------------------------------------------
	def training_loop():
	global accuracy, mode, replay_buffer
	optimizer, criterion = load_or_init_model()
	print("[INFO] EvoFartNet online—replay armed.")
	while training_active:
	try:
	# Batch: New synth + replay sample
	batch_waves = []
	batch_labels = []
	# New
	for _ in range(config.batch_size // 2):
	label = random.choice(SOUND_LABELS)
	wave, _ = generate_synthetic_sound(label)
	aug_wave = preprocess_audio(wave, config.target_sr, augment=True).numpy().squeeze()
	batch_waves.append(aug_wave)
	batch_labels.append(SOUND_LABELS.index(label))
	# Replay (anti-forgetting)
	if replay_buffer:
	for _ in range(config.batch_size // 2):
	old = random.choice(replay_buffer)
	batch_waves.append(old[0])
	batch_labels.append(old[1])
	batch_tensor = torch.stack([preprocess_audio(w, config.target_sr) for w in batch_waves])
	labels_tensor = torch.tensor(batch_labels)

	# Forward/Back
	logits = model(batch_tensor)
	loss = criterion(logits, labels_tensor)
	preds = torch.argmax(logits, dim=1)
	nn_acc = (preds == labels_tensor).float().mean().item()

	# Hybrid w/ micro-jury (sample 8)
	hybrid_acc = 0
	for i in range(min(8, len(batch_waves))):
	desc = f"Aug {SOUND_LABELS[batch_labels[i]]} chunk."
	probs = predict(batch_tensor[i:i+1])
	hybrid_acc += hybrid_agree(probs, SOUND_LABELS[batch_labels[i]], desc)
	hybrid_acc /= min(8, len(batch_waves))
	acc = 0.8 * nn_acc + 0.2 * hybrid_acc

	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	# Buffer mgmt
	for i, (wave, lbl) in enumerate(zip(batch_waves[:config.batch_size//2], batch_labels[:config.batch_size//2])): # New only
	replay_buffer.append((wave, lbl, time.time()))
	if len(replay_buffer) > config.replay_size:
	replay_buffer.pop(0)

	# Walk-forward val (last window)
	if len(replay_buffer) >= config.walk_window:
	val_idx = np.random.choice(len(replay_buffer), min(32, len(replay_buffer)), replace=False)
	val_batch = torch.stack([preprocess_audio(buf[0], config.target_sr) for buf in [replay_buffer[i] for i in val_idx]])
	val_labels = torch.tensor([replay_buffer[i][1] for i in val_idx])
	with torch.no_grad():
	v_logits = model(val_batch)
	v_preds = torch.argmax(v_logits, dim=1)
	val_acc = (v_preds == val_labels).float().mean().item()
	accuracy = val_acc

	# Log & Save
	log_record = {"loss": loss.item(), "acc": acc, "mode": mode, "ts": time.time()}
	with open(config.log_file, "a") as f:
	f.write(json.dumps(log_record) + "\n")
	torch.save(model.state_dict(), config.model_path)

	# Mode switch
	if mode == "synth" and accuracy > config.flawless_acc:
	mode = "real"
	print("[INFO] Synth flawless—mic drop to real mode!")

	time.sleep(0.1)
	except Exception as e:
	print(f"[ERROR] Loop: {e}")
	time.sleep(1)

	# Real Mic (Pseudo-Label Hybrid)
	def audio_callback(indata, frames, time, status):
	if status: print(status)
	q.put(indata.copy().flatten())

	def start_real_recording():
	global training_active
	if mode == "real" and training_active:
	stream = sd.InputStream(samplerate=config.target_sr, channels=1, callback=audio_callback,
	blocksize=int(config.target_sr * config.chunk_len))
	stream.start()
	def process_queue():
	while training_active:
	try:
	chunk = q.get(timeout=1).numpy()
	waveform = torch.from_numpy(chunk).unsqueeze(0).float()
	probs = predict(waveform)
	pred_label = max(probs, key=probs.get)
	# LLM pseudo-label vote
	desc = "Real-time mystery sound."
	pseudo_conf = hybrid_agree(probs, pred_label, desc)
	if pseudo_conf > 0.7: # Confident?
	replay_buffer.append((chunk, SOUND_LABELS.index(pred_label), time.time()))
	if len(replay_buffer) > config.replay_size:
	replay_buffer.pop(0)
	except queue.Empty:
	pass
	threading.Thread(target=process_queue, daemon=True).start()
	print("[INFO] Mic live—pseudo-labeling chaos.")

	# --------------------------------------------------
	# Neuromorphic Neon Dark UI (Pulsing Acc)
	# --------------------------------------------------
	neuromorphic_css = """
	body { background: #0a0a0a; color: #00ff88; font-family: 'Courier New', monospace; }
	.gradio-container { background: linear-gradient(135deg, #0a0a0a, #1a1a1a); }
	.card {
	background: #1a1a1a; border-radius: 20px;
	box-shadow: inset 4px 4px 8px rgba(0,0,0,0.3), -4px -4px 8px rgba(0,255,136,0.1);
	padding: 20px; margin: 10px; border: none;
	}
	button {
	background: #1a1a1a !important; color: #ff00ff !important; border-radius: 15px !important;
	box-shadow: 2px 2px 5px rgba(0,0,0,0.5), -2px -2px 5px rgba(255,0,255,0.2) !important;
	transition: all 0.3s;
	}
	button:hover {
	box-shadow: inset 2px 2px 5px rgba(0,0,0,0.5), -2px -2px 5px rgba(255,0,255,0.4) !important;
	color: #00ff88 !important;
	}
	.textbox { background: #0f0f0f; color: #00ff88; border: 1px solid #333; border-radius: 10px; }
	@keyframes pulse { 0% { box-shadow: 0 0 0 0 rgba(0,255,136,0.7); } 70% { box-shadow: 0 0 0 10px rgba(0,255,136,0); } 100% { box-shadow: 0 0 0 0 rgba(0,255,136,0); } }
	.acc-high { animation: pulse 2s infinite; background: linear-gradient(45deg, #ff00ff, #00ff88); }
	"""

	def start_training():
	global training_active
	if not training_active:
	training_active = True
	threading.Thread(target=training_loop, daemon=True).start()
	if mode == "real":
	start_real_recording()
	diss = random.choice(config.cartman_disses)
	return f"Evo started! Mode: {mode} \| Acc: {accuracy:.3f} \| {diss}"
	return "Already evolving, ya nasty."

	def stop_training():
	global training_active
	training_active = False
	return "Evolution halted—replay buffer intact."

	def get_status():
	pulse_class = "acc-high" if accuracy > 0.95 else ""
	diss = random.choice(config.cartman_disses)
	return f'<div class="{pulse_class}">Status: {mode} \| Acc: {accuracy:.3f} \| {diss}</div>'

	with gr.Blocks(css=neuromorphic_css, title="🧪 EvoFartNet: Neon Incremental Fartboard") as demo:
	gr.Markdown("# 💨 EvoFartNet Dashboard: Replay-Powered Audio Apocalypse")
	gr.Markdown("Incremental CNN/RNN + LLM Jury \| Synth Augs → Real Pseudo \| Neon Pulse UX")

	with gr.Row():
	with gr.Column():
	start_btn = gr.Button("🚀 Ignite Evolution", variant="primary")
	stop_btn = gr.Button("⏹️ Abort Mission")
	status = gr.HTML(label="Live Status", value=get_status())
	with gr.Column():
	toggle_rnn = gr.Checkbox(label="RNN Mode (Echoes)", value=False)
	acc_log = gr.Textbox(label="Acc Evolution Log", lines=5, value="Pre-roast warmup...")

	start_btn.click(start_training, outputs=status)
	stop_btn.click(stop_training, outputs=status)
	toggle_rnn.change(lambda v: load_or_init_model(v), inputs=toggle_rnn)
	demo.load(get_status, outputs=status)

	if __name__ == "__main__":
	port = int(os.getenv("GRADIO_PORT", 7860))
	share = os.getenv("GRADIO_SHARE", "False").lower() == "true"
	server_name = "0.0.0.0" if os.getenv("EXPOSE_ALL", "false").lower() == "true" else "127.0.0.1"

	port_found = False
	for p in range(port, port + 10):
	try:
	demo.launch(share=share, server_name=server_name, server_port=p)
	port_found = True
	break
	except OSError as e:
	if "Cannot find empty port" in str(e):
	print(f"[WARN] Port {p} is in use. Trying next available port.")
	continue
	else:
	raise e
	if not port_found:
	print("[ERROR] Could not find an available port for Gradio after several attempts.")