Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import os | |
| import re | |
| import json | |
| import spaces | |
| from transformers import AutoProcessor, CohereAsrForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer | |
| from transformers.audio_utils import load_audio | |
| # ------------------------------------------------------------------------- | |
| # NATIVE ZERO-GPU MODEL INITIALIZATION | |
| # ------------------------------------------------------------------------- | |
| # Models are initialized at the module level using .to("cuda") so ZeroGPU can handle fast-state caching. | |
| print("--> Initializing Cohere Transcribe (2B Parameter Layer)...") | |
| asr_id = "CohereLabs/cohere-transcribe-03-2026" | |
| asr_processor = AutoProcessor.from_pretrained(asr_id) | |
| asr_model = CohereAsrForConditionalGeneration.from_pretrained(asr_id, torch_dtype=torch.float16) | |
| asr_model.to("cuda") | |
| print("--> Initializing Tiny Aya Earth (3.35B Parameter Layer)...") | |
| llm_id = "CohereLabs/tiny-aya-earth" | |
| llm_tokenizer = AutoTokenizer.from_pretrained(llm_id) | |
| llm_model = AutoModelForCausalLM.from_pretrained(llm_id, torch_dtype=torch.float16) | |
| llm_model.to("cuda") | |
| # ------------------------------------------------------------------------- | |
| # TEXT PROCESSING & REGEX CLEANING | |
| # ------------------------------------------------------------------------- | |
| def clean_and_parse_json(raw_text): | |
| """ | |
| Cleans structural code blocks out of the raw LLM output to safeguard the JSON payload. | |
| """ | |
| try: | |
| cleaned = re.sub(r"<think>.*?</think>", "", raw_text, flags=re.DOTALL) | |
| json_match = re.search(r"```json\s*(.*?)\s*```", cleaned, re.DOTALL) | |
| if json_match: | |
| cleaned = json_match.group(1) | |
| return json.loads(cleaned.strip()) | |
| except Exception: | |
| return { | |
| "summary": "Direct extraction parsing failed. Review raw output block in Code tab.", | |
| "tasks": [["Review raw compilation logs", "High", "Action Required"]], | |
| "code": raw_text | |
| } | |
| # ------------------------------------------------------------------------- | |
| # PIPELINE ORCHESTRATION (RUNS EXCLUSIVELY ON ZERO-GPU RESOURCING) | |
| # ------------------------------------------------------------------------- | |
| def run_pipeline(audio_path, language_code, workflow_type, extra_instructions): | |
| if not audio_path: | |
| return "Error: Empty audio track received.", [], "No execution context.", None | |
| # Step 1: Native Speech-to-Text Processing | |
| try: | |
| audio = load_audio(audio_path, sampling_rate=16000) | |
| asr_inputs = asr_processor(audio, sampling_rate=16000, return_tensors="pt", language=language_code) | |
| # Move inputs explicitly to CUDA | |
| asr_inputs = {k: v.to("cuda", dtype=asr_model.dtype) if torch.is_tensor(v) else v for k, v in asr_inputs.items()} | |
| with torch.no_grad(): | |
| asr_outputs = asr_model.generate(**asr_inputs, max_new_tokens=256) | |
| transcript = asr_processor.decode(asr_outputs, skip_special_tokens=True) | |
| except Exception as e: | |
| transcript = f"[ASR Layer Critical Failure: {str(e)}]" | |
| # Step 2: System Architecture Payload Formatting | |
| system_prompt = ( | |
| "You are an authoritative backend systems architect. Analyze the provided context " | |
| "and output a strict JSON object with exactly three keys: 'summary' (string), 'tasks' (list of lists " | |
| "where each item is [Task Name, Priority Low/Medium/High, Status Context]), and 'code' (clean markdown script or schema block).\n" | |
| "Do not include conversational filler. Return ONLY valid JSON." | |
| ) | |
| user_content = f"Workflow Class: {workflow_type}\nContext: {transcript}\nModifiers: {extra_instructions}" | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_content} | |
| ] | |
| try: | |
| inputs = llm_tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| tokenize=True, | |
| return_dict=True, | |
| return_tensors="pt" | |
| ).to("cuda") | |
| with torch.no_grad(): | |
| outputs = llm_model.generate( | |
| **inputs, | |
| max_new_tokens=1024, | |
| do_sample=True, | |
| temperature=0.1, | |
| top_p=0.95 | |
| ) | |
| response_tokens = outputs[0][inputs['input_ids'].shape[-1]:] | |
| raw_output = llm_tokenizer.decode(response_tokens, skip_special_tokens=True) | |
| # Breakdown raw text arrays into programmatic components | |
| parsed_data = clean_and_parse_json(raw_output) | |
| summary = parsed_data.get("summary", "No summary processed.") | |
| tasks = parsed_data.get("tasks", []) | |
| code_block = parsed_data.get("code", "# No artifacts compiled.") | |
| # Write asset data out to local disk space for immediate client download | |
| output_filename = "fone_architecture_spec.md" | |
| with open(output_filename, "w") as f: | |
| f.write(f"# FONE SPECIFICATION\n\n## Audio Transcript\n{transcript}\n\n## Scope Summary\n{summary}") | |
| return summary, tasks, code_block, output_filename | |
| except Exception as e: | |
| return f"LLM Generation Failure: {str(e)}", [], f"```python\n# Execution Trace\n{str(e)}\n```", None | |
| # ------------------------------------------------------------------------- | |
| # INTERFACE ORCHESTRATION (GRADIO 6 BLOCK COMPLIANT) | |
| # ------------------------------------------------------------------------- | |
| with gr.Blocks(title="fone // Sovereign Workspace") as demo: | |
| gr.Markdown("## 🎛️ fone // Voice Architecture Pipeline") | |
| gr.Markdown("*Decentralized hardware orchestration running on native containerized ZeroGPU frames.*") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📥 Input Core") | |
| audio_feed = gr.Audio(type="filepath", label="Voice Master Input") | |
| with gr.Row(): | |
| lang_selector = gr.Dropdown(choices=["en", "fr", "es", "de", "ar", "ja"], value="en", label="Input Language") | |
| workflow_selector = gr.Dropdown( | |
| choices=["Feature Engineering Specification", "Database Schema Map", "Automated System Scripts"], | |
| value="Feature Engineering Specification", | |
| label="Routing Class" | |
| ) | |
| instruction_overlay = gr.Textbox(label="Execution Modifiers", placeholder="e.g., Target strict Tailwind configurations...") | |
| trigger_btn = gr.Button("Execute Pipeline Trace", variant="primary") | |
| file_download = gr.File(label="Exported System Artifacts") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 📤 Orchestration Hub") | |
| with gr.Tabs(): | |
| with gr.TabItem("System Summary"): | |
| summary_display = gr.Textbox(label="Extracted Scope", lines=8, interactive=False) | |
| with gr.TabItem("Task Allocation Matrix"): | |
| task_matrix = gr.Dataframe( | |
| headers=["Objective / Component", "Priority Rank", "Status Context"], | |
| datatype=["str", "str", "str"], | |
| row_count=5, | |
| column_count=(3, "fixed") # Patched col_count deprecation for Gradio 6 | |
| ) | |
| with gr.TabItem("Code & Schema Artifacts"): | |
| code_display = gr.Code(language="markdown", label="Isolated Scripts", lines=15) | |
| trigger_btn.click( | |
| fn=run_pipeline, | |
| inputs=[audio_feed, lang_selector, workflow_selector, instruction_overlay], | |
| outputs=[summary_display, task_matrix, code_display, file_download] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(theme=gr.themes.Monochrome()) |