import gradio as gr import torch import os import re import json import spaces from transformers import AutoProcessor, CohereAsrForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer from transformers.audio_utils import load_audio # ------------------------------------------------------------------------- # NATIVE ZERO-GPU MODEL INITIALIZATION # ------------------------------------------------------------------------- # Models are initialized at the module level using .to("cuda") so ZeroGPU can handle fast-state caching. print("--> Initializing Cohere Transcribe (2B Parameter Layer)...") asr_id = "CohereLabs/cohere-transcribe-03-2026" asr_processor = AutoProcessor.from_pretrained(asr_id) asr_model = CohereAsrForConditionalGeneration.from_pretrained(asr_id, torch_dtype=torch.float16) asr_model.to("cuda") print("--> Initializing Tiny Aya Earth (3.35B Parameter Layer)...") llm_id = "CohereLabs/tiny-aya-earth" llm_tokenizer = AutoTokenizer.from_pretrained(llm_id) llm_model = AutoModelForCausalLM.from_pretrained(llm_id, torch_dtype=torch.float16) llm_model.to("cuda") # ------------------------------------------------------------------------- # TEXT PROCESSING & REGEX CLEANING # ------------------------------------------------------------------------- def clean_and_parse_json(raw_text): """ Cleans structural code blocks out of the raw LLM output to safeguard the JSON payload. """ try: cleaned = re.sub(r".*?", "", raw_text, flags=re.DOTALL) json_match = re.search(r"```json\s*(.*?)\s*```", cleaned, re.DOTALL) if json_match: cleaned = json_match.group(1) return json.loads(cleaned.strip()) except Exception: return { "summary": "Direct extraction parsing failed. Review raw output block in Code tab.", "tasks": [["Review raw compilation logs", "High", "Action Required"]], "code": raw_text } # ------------------------------------------------------------------------- # PIPELINE ORCHESTRATION (RUNS EXCLUSIVELY ON ZERO-GPU RESOURCING) # ------------------------------------------------------------------------- @spaces.GPU(duration=90) def run_pipeline(audio_path, language_code, workflow_type, extra_instructions): if not audio_path: return "Error: Empty audio track received.", [], "No execution context.", None # Step 1: Native Speech-to-Text Processing try: audio = load_audio(audio_path, sampling_rate=16000) asr_inputs = asr_processor(audio, sampling_rate=16000, return_tensors="pt", language=language_code) # Move inputs explicitly to CUDA asr_inputs = {k: v.to("cuda", dtype=asr_model.dtype) if torch.is_tensor(v) else v for k, v in asr_inputs.items()} with torch.no_grad(): asr_outputs = asr_model.generate(**asr_inputs, max_new_tokens=256) transcript = asr_processor.decode(asr_outputs, skip_special_tokens=True) except Exception as e: transcript = f"[ASR Layer Critical Failure: {str(e)}]" # Step 2: System Architecture Payload Formatting system_prompt = ( "You are an authoritative backend systems architect. Analyze the provided context " "and output a strict JSON object with exactly three keys: 'summary' (string), 'tasks' (list of lists " "where each item is [Task Name, Priority Low/Medium/High, Status Context]), and 'code' (clean markdown script or schema block).\n" "Do not include conversational filler. Return ONLY valid JSON." ) user_content = f"Workflow Class: {workflow_type}\nContext: {transcript}\nModifiers: {extra_instructions}" messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_content} ] try: inputs = llm_tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt" ).to("cuda") with torch.no_grad(): outputs = llm_model.generate( **inputs, max_new_tokens=1024, do_sample=True, temperature=0.1, top_p=0.95 ) response_tokens = outputs[0][inputs['input_ids'].shape[-1]:] raw_output = llm_tokenizer.decode(response_tokens, skip_special_tokens=True) # Breakdown raw text arrays into programmatic components parsed_data = clean_and_parse_json(raw_output) summary = parsed_data.get("summary", "No summary processed.") tasks = parsed_data.get("tasks", []) code_block = parsed_data.get("code", "# No artifacts compiled.") # Write asset data out to local disk space for immediate client download output_filename = "fone_architecture_spec.md" with open(output_filename, "w") as f: f.write(f"# FONE SPECIFICATION\n\n## Audio Transcript\n{transcript}\n\n## Scope Summary\n{summary}") return summary, tasks, code_block, output_filename except Exception as e: return f"LLM Generation Failure: {str(e)}", [], f"```python\n# Execution Trace\n{str(e)}\n```", None # ------------------------------------------------------------------------- # INTERFACE ORCHESTRATION (GRADIO 6 BLOCK COMPLIANT) # ------------------------------------------------------------------------- with gr.Blocks(title="fone // Sovereign Workspace") as demo: gr.Markdown("## 🎛️ fone // Voice Architecture Pipeline") gr.Markdown("*Decentralized hardware orchestration running on native containerized ZeroGPU frames.*") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📥 Input Core") audio_feed = gr.Audio(type="filepath", label="Voice Master Input") with gr.Row(): lang_selector = gr.Dropdown(choices=["en", "fr", "es", "de", "ar", "ja"], value="en", label="Input Language") workflow_selector = gr.Dropdown( choices=["Feature Engineering Specification", "Database Schema Map", "Automated System Scripts"], value="Feature Engineering Specification", label="Routing Class" ) instruction_overlay = gr.Textbox(label="Execution Modifiers", placeholder="e.g., Target strict Tailwind configurations...") trigger_btn = gr.Button("Execute Pipeline Trace", variant="primary") file_download = gr.File(label="Exported System Artifacts") with gr.Column(scale=2): gr.Markdown("### 📤 Orchestration Hub") with gr.Tabs(): with gr.TabItem("System Summary"): summary_display = gr.Textbox(label="Extracted Scope", lines=8, interactive=False) with gr.TabItem("Task Allocation Matrix"): task_matrix = gr.Dataframe( headers=["Objective / Component", "Priority Rank", "Status Context"], datatype=["str", "str", "str"], row_count=5, column_count=(3, "fixed") # Patched col_count deprecation for Gradio 6 ) with gr.TabItem("Code & Schema Artifacts"): code_display = gr.Code(language="markdown", label="Isolated Scripts", lines=15) trigger_btn.click( fn=run_pipeline, inputs=[audio_feed, lang_selector, workflow_selector, instruction_overlay], outputs=[summary_display, task_matrix, code_display, file_download] ) if __name__ == "__main__": demo.launch(theme=gr.themes.Monochrome())