Faaart / app.py
luguog's picture
Create app.py
8010b51 verified
import os
import sys
import json
import time
import tempfile
import traceback
import threading
from typing import Optional, Dict, Any, List
from pathlib import Path
from collections import deque
import numpy as np
import soundfile as sf
import librosa
import torch
import gradio as gr
from scipy.io.wavfile import write as write_wav
from midiutil import MIDIFile
from litellm import completion
from web3 import Web3, HTTPProvider
from web3.middleware import geth_poa_middleware
from pydantic import BaseSettings, Field
import requests
import io
import base64
# ============================================================================
# CONFIGURATION
# ============================================================================
class Config(BaseSettings):
"""Application configuration for Colab/HF"""
# Audio parameters (for analysis)
SR: int = Field(16000, env="SR")
# Cartman LLM Configuration (3 Real LLM Calls Defined)
# NOTE: Set these environment variables in Colab secrets or HF Space secrets!
CARTMAN_MODEL_1: str = Field("groq/llama-3.1-8b-instant", env="CARTMAN_MODEL_1")
CARTMAN_MODEL_2: str = Field("openai/gpt-3.5-turbo", env="CARTMAN_MODEL_2")
CARTMAN_MODEL_3: str = Field("perplexity/llama-3-8b-instruct", env="CARTMAN_MODEL_3")
CARTMAN_TEMPERATURE: float = Field(0.9, env="CARTMAN_TEMPERATURE")
# Blockchain (Optional, set to None for safe run)
RPC_URL: Optional[str] = Field(None, env="RPC_URL")
PRIVATE_KEY: Optional[str] = Field(None, env="PRIVATE_KEY")
NFT_CONTRACT_ADDRESS: Optional[str] = Field(None, env="NFT_CONTRACT_ADDRESS")
class Config:
env_file = ".env"
cfg = Config()
# ============================================================================
# ML AND LLM ABSTRACTIONS
# ============================================================================
class ModelHandler:
"""Handles the ML model for detection and feature extraction"""
def __init__(self, sr: int):
self.sr = sr
self.target_length = sr
# NOTE: Dummy CNN initialized - training or weights would be needed for real detection
self.model = torch.nn.Identity() # Simplest possible mock
self.model.eval()
def extract_audio_features(self, audio_data: np.ndarray) -> Dict[str, Any]:
"""Extract comprehensive audio features"""
if len(audio_data) == 0:
return {
'rms_mean': 0.0, 'zcr_mean': 0.0, 'duration': 0.0,
'spectral_centroid_mean': 0.0, 'spectral_bandwidth_mean': 0.0
}
# Ensure mono and correct sample rate for Librosa
if audio_data.ndim > 1: audio_data = audio_data.mean(axis=1)
features = {
'rms_mean': float(np.sqrt(np.mean(audio_data**2))),
'zcr_mean': float(librosa.feature.zero_crossing_rate(audio_data)[0].mean()),
'duration': len(audio_data) / self.sr
}
S = np.abs(librosa.stft(audio_data))
features.update({
'spectral_centroid_mean': float(librosa.feature.spectral_centroid(S=S, sr=self.sr)[0].mean()),
'spectral_bandwidth_mean': float(librosa.feature.spectral_bandwidth(S=S, sr=self.sr)[0].mean()),
})
return features
def predict_fart(self, audio_data: np.ndarray) -> float:
"""Mock prediction: calculate confidence based on energy/duration ratio"""
features = self.extract_audio_features(audio_data)
# Placeholder logic based on features
confidence = min(features['rms_mean'] * 5 + features['duration'] * 0.1, 0.99)
return confidence
class CartmanCommentary:
"""Eric Cartman style commentary generator using 3 LLM providers/models."""
@staticmethod
def _generate(model_name: str, prompt: str) -> str:
"""Internal helper for LLM completion with basic fallback."""
fallback_msg = "Error: LLM API Key missing or connection failed. Respect my authoritah!"
try:
response = completion(
model=model_name,
messages=[{"role": "user", "content": prompt}],
temperature=cfg.CARTMAN_TEMPERATURE,
max_tokens=80
)
return response.choices[0].message.content.strip()
except Exception as e:
# IMPORTANT: For Colab/HF, this catches missing API keys
print(f"LLM API Error for {model_name}: {e}")
return fallback_msg
@staticmethod
def _get_prompt(features: Dict[str, Any]) -> str:
"""Generates the base prompt for all LLM calls."""
return f"""
You are Eric Cartman from South Park - loud, arrogant, selfish, and hilarious.
Respond in character to this detected flatulent event:
Event Metrics:
- Confidence (Simulated): {features.get('confidence', 0):.2f}
- Duration: {features.get('duration', 0):.2f}s
- Energy (RMS): {features.get('rms_mean', 0):.4f}
- Spectral Quality: {features.get('spectral_centroid_mean', 0):.1f}Hz
Make your response:
- 1-2 sentences maximum
- Hilariously arrogant and self-centered
- Use South Park style language
- NO markdown, just plain text, do not use quotes.
"""
@staticmethod
def generate_all_commentaries(features: Dict[str, Any]) -> Dict[str, str]:
"""Generates commentary from Cartman, Stan, and Kyle (3 LLM calls)."""
prompt = CartmanCommentary._get_prompt(features)
# 1. Cartman (Primary)
cartman_prompt = f"You are Eric Cartman. {prompt}"
cartman_commentary = CartmanCommentary._generate(cfg.CARTMAN_MODEL_1, cartman_prompt)
# 2. Stan (Annoyed Response)
stan_prompt = f"You are Stan Marsh. Act annoyed and comment on Cartman's fart. {prompt}"
stan_commentary = CartmanCommentary._generate(cfg.CARTMAN_MODEL_2, stan_prompt)
# 3. Kyle (Insulting Response)
kyle_prompt = f"You are Kyle Broflovski. Insult Cartman and the fart. {prompt}"
kyle_commentary = CartmanCommentary._generate(cfg.CARTMAN_MODEL_3, kyle_prompt)
return {
'cartman': cartman_commentary,
'stan': stan_commentary,
'kyle': kyle_commentary
}
# ============================================================================
# BLOCKCHAIN ABSTRACTION (Refactored for controlled button activation)
# ============================================================================
class Web3Minter:
"""Handles NFT contract interaction"""
def __init__(self, config: Config):
self.configured = False
if config.RPC_URL and config.PRIVATE_KEY and config.NFT_CONTRACT_ADDRESS:
try:
self.web3 = Web3(HTTPProvider(config.RPC_URL))
if not self.web3.is_connected():
raise ConnectionError("Web3 RPC connection failed.")
if 'poa' in config.RPC_URL.lower():
self.web3.middleware_onion.inject(geth_poa_middleware, layer=0)
self.account = self.web3.eth.account.from_key(config.PRIVATE_KEY)
self.contract_address = self.web3.to_checksum_address(config.NFT_CONTRACT_ADDRESS)
self.configured = True
print("Web3 initialized. Minting is enabled.")
except Exception as e:
print(f"Web3 initialization error: {e}. Minting disabled.")
def mint_fart_nft(self, features: Dict[str, Any], token_uri: str):
"""NON-MOCKED: Placeholder for transaction logic."""
if not self.configured:
return f"❌ Error: Web3 is not fully configured with RPC URL, Private Key, and Contract Address."
# --- Transaction Logic Placeholder ---
# NOTE: Replace this line with your actual web3 transaction logic.
raise NotImplementedError(
"Web3 Minting requires the contract ABI and transaction logic to be completed. "
"Please complete the transaction steps inside Web3Minter.mint_fart_nft."
)
# ============================================================================
# ANALYSIS PIPELINE
# ============================================================================
def analyze_and_compose(audio_tuple: tuple[int, np.ndarray]):
"""Main pipeline for analysis, LLM, MIDI, and NFT preparation."""
if audio_tuple is None:
return "Please upload or record an audio file.", None, None, None, None, None, None, None
sr, audio_arr = audio_tuple
# Check if audio is valid
if audio_arr.size == 0:
return "Audio input is empty.", None, None, None, None, None, None, None
# Resample if needed (to match Librosa standard or model expectation)
if sr != cfg.SR:
audio_arr = librosa.resample(audio_arr, orig_sr=sr, target_sr=cfg.SR)
sr = cfg.SR
# Ensure audio is float32 and mono
if audio_arr.ndim > 1: audio_arr = audio_arr.mean(axis=1)
audio_arr = audio_arr.astype(np.float32)
try:
handler = ModelHandler(sr=sr)
# 1. Feature Extraction & Confidence
features = handler.extract_audio_features(audio_arr)
confidence = handler.predict_fart(audio_arr)
features['confidence'] = confidence
# 2. LLM Commentary (3 REAL API CALLS)
commentary = CartmanCommentary.generate_all_commentaries(features)
# 3. MIDI Generation
midi_path = Path(tempfile.gettempdir()) / f"fart_symphony_{int(time.time())}.mid"
midi_file = MIDIFile(1)
midi_file.addTempo(0, 0, 120)
# Use simple pitch mapping based on energy
pitch_base = int(60 + confidence * 30) # C3 to F5 based on confidence
midi_file.addNote(0, 0, pitch_base, 0, features['duration'], 100)
midi_file.addNote(0, 0, pitch_base + 7, 0.1, features['duration'] * 0.5, 90) # Simple 5th harmony
with open(midi_path, "wb") as f:
midi_file.writeFile(f)
# 4. Prepare Outputs
midi_b64 = base64.b64encode(midi_path.read_bytes()).decode("utf-8")
midi_download_html = f'''
<a download="{midi_path.name}" href="data:audio/midi;base64,{midi_b64}" target="_blank">
<button class="gr-button gr-button-lg gr-button-secondary">
🎹 Download MIDI Symphony
</button>
</a>
'''
# Prepare formatted feature output
feat_output = {
"ID": int(time.time()),
"Confidence": f"{confidence:.2f}",
"Duration (s)": f"{features['duration']:.2f}",
"Energy (RMS)": f"{features['rms_mean']:.4f}",
"Spectral Centroid (Hz)": f"{features['spectral_centroid_mean']:.1f}",
}
return (
"βœ… Analysis Complete! See results below.",
feat_output,
commentary['cartman'],
commentary['stan'],
commentary['kyle'],
midi_download_html,
str(midi_path), # Pass path for minting
json.dumps(features) # Pass all features for minting
)
except Exception as e:
error_msg = f"An error occurred during processing: {e}\n{traceback.format_exc()}"
return error_msg, None, None, None, None, None, None, None
# ============================================================================
# GRADIO UI
# ============================================================================
def handle_mint_nft(midi_path_str: str, features_json: str):
"""Button handler for NFT minting (calls Web3 Minter)"""
minter = Web3Minter(cfg)
if not minter.configured:
return "Minting Failed: Web3 components (RPC, Key, Address) are NOT configured in the environment."
try:
features = json.loads(features_json)
# Mocking the token URI since we don't have a real metadata endpoint
token_uri = f"https://mock.fartlab.io/metadata/{features['ID']}"
# This will raise NotImplementedError if logic isn't finished
mint_status = minter.mint_fart_nft(features, token_uri)
return f"Minting Attempted! Status: {mint_status}. Requires transaction logic completion."
except NotImplementedError as e:
return f"❌ Minting Failed: {e}"
except Exception as e:
return f"❌ Critical Minting Error: {e}"
def create_gradio_app():
"""Builds the Gradio interface."""
# Store temporary analysis results in state for the Mint button to access
midi_path_state = gr.State(None)
features_state = gr.State(None)
with gr.Blocks(theme=gr.themes.Soft(), title="🎡 Cartman Fart Research Lab") as demo:
gr.Markdown("# 🎡 Cartman Fart Research Lab (Colab/HF Optimized)")
gr.Markdown("Upload or record audio for analysis. **This version uses real-time API calls to three different LLMs for commentary!**")
with gr.Row():
audio_input = gr.Audio(
label="πŸŽ™οΈ Upload or Record Audio",
type="numpy",
sources=["microphone", "upload"]
)
process_btn = gr.Button("πŸ” Analyze & Compose Fart", variant="primary")
status_output = gr.Textbox(label="Status", interactive=False)
# --- RESULTS TAB ---
with gr.Tab("πŸ“Š Analysis & Commentary"):
with gr.Row():
feature_output = gr.JSON(label="Fart Feature Metrics", interactive=False, min_width=350)
midi_download_html = gr.HTML(label="Symphony Download", min_width=350)
gr.Markdown("## πŸ—£οΈ Multi-LLM Commentary")
cartman_out = gr.Textbox(label="Cartman's Arrogant Review (Model 1)", lines=2, interactive=False)
stan_out = gr.Textbox(label="Stan's Annoyed Response (Model 2)", lines=2, interactive=False)
kyle_out = gr.Textbox(label="Kyle's Insult (Model 3)", lines=2, interactive=False)
# --- NFT TAB ---
with gr.Tab("πŸ” NFT Minting (Requires Config)"):
mint_btn = gr.Button("πŸ”¨ Mint Fart NFT (Requires API Keys/ABI)", variant="secondary")
mint_status = gr.Textbox(label="NFT Minting Status", interactive=False, placeholder="Press the button after analysis to attempt minting...")
# --- EVENT HANDLERS ---
process_btn.click(
fn=analyze_and_compose,
inputs=[audio_input],
outputs=[
status_output,
feature_output,
cartman_out,
stan_out,
kyle_out,
midi_download_html,
midi_path_state, # Save to state
features_state # Save to state
]
)
mint_btn.click(
fn=handle_mint_nft,
inputs=[midi_path_state, features_state],
outputs=[mint_status]
)
gr.Markdown("""
---
**Configuration Notes:**
1. **LLMs:** To avoid "Error: LLM API Key missing," you **must** set `GROQ_API_KEY`, `OPENAI_API_KEY`, and `PERPLEXITY_API_KEY` in your environment (Colab Secrets or HF Space Secrets).
2. **NFT Minting:** Minting is disabled by default. Set `RPC_URL`, `PRIVATE_KEY`, and `NFT_CONTRACT_ADDRESS` to enable the button. You still need to complete the blockchain transaction logic in `Web3Minter.mint_fart_nft`.
""")
return demo
if __name__ == "__main__":
ui = create_gradio_app()
print("🎡 Starting Cartman Fart Research Lab for Colab/HF...")
# Colab and HF Spaces typically use the default 7860 port
ui.launch(
server_name="0.0.0.0",
server_port=7860,
share=True # Enable sharing for Colab access
)