import gradio as gr
import torch
import os
import re
import json
import spaces
from transformers import AutoProcessor, CohereAsrForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
from transformers.audio_utils import load_audio

# -------------------------------------------------------------------------
# NATIVE ZERO-GPU MODEL INITIALIZATION
# -------------------------------------------------------------------------
# Models are initialized at the module level using .to("cuda") so ZeroGPU can handle fast-state caching.
print("--> Initializing Cohere Transcribe (2B Parameter Layer)...")
asr_id = "CohereLabs/cohere-transcribe-03-2026"
asr_processor = AutoProcessor.from_pretrained(asr_id)
asr_model = CohereAsrForConditionalGeneration.from_pretrained(asr_id, torch_dtype=torch.float16)
asr_model.to("cuda")

print("--> Initializing Tiny Aya Earth (3.35B Parameter Layer)...")
llm_id = "CohereLabs/tiny-aya-earth"
llm_tokenizer = AutoTokenizer.from_pretrained(llm_id)
llm_model = AutoModelForCausalLM.from_pretrained(llm_id, torch_dtype=torch.float16)
llm_model.to("cuda")

# -------------------------------------------------------------------------
# TEXT PROCESSING & REGEX CLEANING
# -------------------------------------------------------------------------
def clean_and_parse_json(raw_text):
    """
    Cleans structural code blocks out of the raw LLM output to safeguard the JSON payload.
    """
    try:
        cleaned = re.sub(r"<think>.*?</think>", "", raw_text, flags=re.DOTALL)
        json_match = re.search(r"```json\s*(.*?)\s*```", cleaned, re.DOTALL)
        if json_match:
            cleaned = json_match.group(1)
        return json.loads(cleaned.strip())
    except Exception:
        return {
            "summary": "Direct extraction parsing failed. Review raw output block in Code tab.",
            "tasks": [["Review raw compilation logs", "High", "Action Required"]],
            "code": raw_text
        }

# -------------------------------------------------------------------------
# PIPELINE ORCHESTRATION (RUNS EXCLUSIVELY ON ZERO-GPU RESOURCING)
# -------------------------------------------------------------------------
@spaces.GPU(duration=90)
def run_pipeline(audio_path, language_code, workflow_type, extra_instructions):
    if not audio_path:
        return "Error: Empty audio track received.", [], "No execution context.", None

    # Step 1: Native Speech-to-Text Processing
    try:
        audio = load_audio(audio_path, sampling_rate=16000)
        asr_inputs = asr_processor(audio, sampling_rate=16000, return_tensors="pt", language=language_code)
        # Move inputs explicitly to CUDA
        asr_inputs = {k: v.to("cuda", dtype=asr_model.dtype) if torch.is_tensor(v) else v for k, v in asr_inputs.items()}
        
        with torch.no_grad():
            asr_outputs = asr_model.generate(**asr_inputs, max_new_tokens=256)
        transcript = asr_processor.decode(asr_outputs, skip_special_tokens=True)
    except Exception as e:
        transcript = f"[ASR Layer Critical Failure: {str(e)}]"

    # Step 2: System Architecture Payload Formatting
    system_prompt = (
        "You are an authoritative backend systems architect. Analyze the provided context "
        "and output a strict JSON object with exactly three keys: 'summary' (string), 'tasks' (list of lists "
        "where each item is [Task Name, Priority Low/Medium/High, Status Context]), and 'code' (clean markdown script or schema block).\n"
        "Do not include conversational filler. Return ONLY valid JSON."
    )
    user_content = f"Workflow Class: {workflow_type}\nContext: {transcript}\nModifiers: {extra_instructions}"
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_content}
    ]
    
    try:
        inputs = llm_tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt"
        ).to("cuda")
        
        with torch.no_grad():
            outputs = llm_model.generate(
                **inputs,
                max_new_tokens=1024,
                do_sample=True,
                temperature=0.1,
                top_p=0.95
            )
            
        response_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
        raw_output = llm_tokenizer.decode(response_tokens, skip_special_tokens=True)
        
        # Breakdown raw text arrays into programmatic components
        parsed_data = clean_and_parse_json(raw_output)
        summary = parsed_data.get("summary", "No summary processed.")
        tasks = parsed_data.get("tasks", [])
        code_block = parsed_data.get("code", "# No artifacts compiled.")
        
        # Write asset data out to local disk space for immediate client download
        output_filename = "fone_architecture_spec.md"
        with open(output_filename, "w") as f:
            f.write(f"# FONE SPECIFICATION\n\n## Audio Transcript\n{transcript}\n\n## Scope Summary\n{summary}")
            
        return summary, tasks, code_block, output_filename
    except Exception as e:
        return f"LLM Generation Failure: {str(e)}", [], f"```python\n# Execution Trace\n{str(e)}\n```", None

# -------------------------------------------------------------------------
# INTERFACE ORCHESTRATION (GRADIO 6 BLOCK COMPLIANT)
# -------------------------------------------------------------------------
with gr.Blocks(title="fone // Sovereign Workspace") as demo:
    gr.Markdown("## 🎛️ fone // Voice Architecture Pipeline")
    gr.Markdown("*Decentralized hardware orchestration running on native containerized ZeroGPU frames.*")
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📥 Input Core")
            audio_feed = gr.Audio(type="filepath", label="Voice Master Input")
            
            with gr.Row():
                lang_selector = gr.Dropdown(choices=["en", "fr", "es", "de", "ar", "ja"], value="en", label="Input Language")
                workflow_selector = gr.Dropdown(
                    choices=["Feature Engineering Specification", "Database Schema Map", "Automated System Scripts"], 
                    value="Feature Engineering Specification", 
                    label="Routing Class"
                )
                
            instruction_overlay = gr.Textbox(label="Execution Modifiers", placeholder="e.g., Target strict Tailwind configurations...")
            trigger_btn = gr.Button("Execute Pipeline Trace", variant="primary")
            file_download = gr.File(label="Exported System Artifacts")

        with gr.Column(scale=2):
            gr.Markdown("### 📤 Orchestration Hub")
            with gr.Tabs():
                with gr.TabItem("System Summary"):
                    summary_display = gr.Textbox(label="Extracted Scope", lines=8, interactive=False)
                with gr.TabItem("Task Allocation Matrix"):
                    task_matrix = gr.Dataframe(
                        headers=["Objective / Component", "Priority Rank", "Status Context"],
                        datatype=["str", "str", "str"],
                        row_count=5,
                        column_count=(3, "fixed")  # Patched col_count deprecation for Gradio 6
                    )
                with gr.TabItem("Code & Schema Artifacts"):
                    code_display = gr.Code(language="markdown", label="Isolated Scripts", lines=15)

    trigger_btn.click(
        fn=run_pipeline,
        inputs=[audio_feed, lang_selector, workflow_selector, instruction_overlay],
        outputs=[summary_display, task_matrix, code_display, file_download]
    )

if __name__ == "__main__":
    demo.launch(theme=gr.themes.Monochrome())