File size: 7,797 Bytes
479c914
6f9f10b
7c0949d
 
 
6f9f10b
 
 
479c914
7c0949d
6f9f10b
7c0949d
6f9f10b
 
 
 
 
 
479c914
6f9f10b
 
 
 
 
7c0949d
 
6f9f10b
7c0949d
 
 
6f9f10b
7c0949d
 
 
 
 
 
 
 
 
6f9f10b
 
 
7c0949d
 
6f9f10b
 
 
 
 
 
 
7c0949d
6f9f10b
 
 
 
 
 
 
 
 
 
 
 
 
 
7c0949d
6f9f10b
 
 
 
7c0949d
6f9f10b
7c0949d
 
 
 
 
 
 
6f9f10b
 
 
 
 
 
 
7c0949d
6f9f10b
 
 
 
 
 
 
 
 
 
 
7c0949d
6f9f10b
 
 
 
 
7c0949d
6f9f10b
 
7c0949d
6f9f10b
7c0949d
 
 
6f9f10b
7c0949d
 
 
 
 
 
6f9f10b
7c0949d
 
 
 
6f9f10b
7c0949d
 
6f9f10b
7c0949d
 
 
 
 
 
6f9f10b
7c0949d
 
 
 
 
 
 
 
 
 
 
 
 
6f9f10b
7c0949d
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import gradio as gr
import torch
import os
import re
import json
import spaces
from transformers import AutoProcessor, CohereAsrForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
from transformers.audio_utils import load_audio

# -------------------------------------------------------------------------
# NATIVE ZERO-GPU MODEL INITIALIZATION
# -------------------------------------------------------------------------
# Models are initialized at the module level using .to("cuda") so ZeroGPU can handle fast-state caching.
print("--> Initializing Cohere Transcribe (2B Parameter Layer)...")
asr_id = "CohereLabs/cohere-transcribe-03-2026"
asr_processor = AutoProcessor.from_pretrained(asr_id)
asr_model = CohereAsrForConditionalGeneration.from_pretrained(asr_id, torch_dtype=torch.float16)
asr_model.to("cuda")

print("--> Initializing Tiny Aya Earth (3.35B Parameter Layer)...")
llm_id = "CohereLabs/tiny-aya-earth"
llm_tokenizer = AutoTokenizer.from_pretrained(llm_id)
llm_model = AutoModelForCausalLM.from_pretrained(llm_id, torch_dtype=torch.float16)
llm_model.to("cuda")

# -------------------------------------------------------------------------
# TEXT PROCESSING & REGEX CLEANING
# -------------------------------------------------------------------------
def clean_and_parse_json(raw_text):
    """
    Cleans structural code blocks out of the raw LLM output to safeguard the JSON payload.
    """
    try:
        cleaned = re.sub(r"<think>.*?</think>", "", raw_text, flags=re.DOTALL)
        json_match = re.search(r"```json\s*(.*?)\s*```", cleaned, re.DOTALL)
        if json_match:
            cleaned = json_match.group(1)
        return json.loads(cleaned.strip())
    except Exception:
        return {
            "summary": "Direct extraction parsing failed. Review raw output block in Code tab.",
            "tasks": [["Review raw compilation logs", "High", "Action Required"]],
            "code": raw_text
        }

# -------------------------------------------------------------------------
# PIPELINE ORCHESTRATION (RUNS EXCLUSIVELY ON ZERO-GPU RESOURCING)
# -------------------------------------------------------------------------
@spaces.GPU(duration=90)
def run_pipeline(audio_path, language_code, workflow_type, extra_instructions):
    if not audio_path:
        return "Error: Empty audio track received.", [], "No execution context.", None

    # Step 1: Native Speech-to-Text Processing
    try:
        audio = load_audio(audio_path, sampling_rate=16000)
        asr_inputs = asr_processor(audio, sampling_rate=16000, return_tensors="pt", language=language_code)
        # Move inputs explicitly to CUDA
        asr_inputs = {k: v.to("cuda", dtype=asr_model.dtype) if torch.is_tensor(v) else v for k, v in asr_inputs.items()}
        
        with torch.no_grad():
            asr_outputs = asr_model.generate(**asr_inputs, max_new_tokens=256)
        transcript = asr_processor.decode(asr_outputs, skip_special_tokens=True)
    except Exception as e:
        transcript = f"[ASR Layer Critical Failure: {str(e)}]"

    # Step 2: System Architecture Payload Formatting
    system_prompt = (
        "You are an authoritative backend systems architect. Analyze the provided context "
        "and output a strict JSON object with exactly three keys: 'summary' (string), 'tasks' (list of lists "
        "where each item is [Task Name, Priority Low/Medium/High, Status Context]), and 'code' (clean markdown script or schema block).\n"
        "Do not include conversational filler. Return ONLY valid JSON."
    )
    user_content = f"Workflow Class: {workflow_type}\nContext: {transcript}\nModifiers: {extra_instructions}"
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_content}
    ]
    
    try:
        inputs = llm_tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt"
        ).to("cuda")
        
        with torch.no_grad():
            outputs = llm_model.generate(
                **inputs,
                max_new_tokens=1024,
                do_sample=True,
                temperature=0.1,
                top_p=0.95
            )
            
        response_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
        raw_output = llm_tokenizer.decode(response_tokens, skip_special_tokens=True)
        
        # Breakdown raw text arrays into programmatic components
        parsed_data = clean_and_parse_json(raw_output)
        summary = parsed_data.get("summary", "No summary processed.")
        tasks = parsed_data.get("tasks", [])
        code_block = parsed_data.get("code", "# No artifacts compiled.")
        
        # Write asset data out to local disk space for immediate client download
        output_filename = "fone_architecture_spec.md"
        with open(output_filename, "w") as f:
            f.write(f"# FONE SPECIFICATION\n\n## Audio Transcript\n{transcript}\n\n## Scope Summary\n{summary}")
            
        return summary, tasks, code_block, output_filename
    except Exception as e:
        return f"LLM Generation Failure: {str(e)}", [], f"```python\n# Execution Trace\n{str(e)}\n```", None

# -------------------------------------------------------------------------
# INTERFACE ORCHESTRATION (GRADIO 6 BLOCK COMPLIANT)
# -------------------------------------------------------------------------
with gr.Blocks(title="fone // Sovereign Workspace") as demo:
    gr.Markdown("## 🎛️ fone // Voice Architecture Pipeline")
    gr.Markdown("*Decentralized hardware orchestration running on native containerized ZeroGPU frames.*")
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### 📥 Input Core")
            audio_feed = gr.Audio(type="filepath", label="Voice Master Input")
            
            with gr.Row():
                lang_selector = gr.Dropdown(choices=["en", "fr", "es", "de", "ar", "ja"], value="en", label="Input Language")
                workflow_selector = gr.Dropdown(
                    choices=["Feature Engineering Specification", "Database Schema Map", "Automated System Scripts"], 
                    value="Feature Engineering Specification", 
                    label="Routing Class"
                )
                
            instruction_overlay = gr.Textbox(label="Execution Modifiers", placeholder="e.g., Target strict Tailwind configurations...")
            trigger_btn = gr.Button("Execute Pipeline Trace", variant="primary")
            file_download = gr.File(label="Exported System Artifacts")

        with gr.Column(scale=2):
            gr.Markdown("### 📤 Orchestration Hub")
            with gr.Tabs():
                with gr.TabItem("System Summary"):
                    summary_display = gr.Textbox(label="Extracted Scope", lines=8, interactive=False)
                with gr.TabItem("Task Allocation Matrix"):
                    task_matrix = gr.Dataframe(
                        headers=["Objective / Component", "Priority Rank", "Status Context"],
                        datatype=["str", "str", "str"],
                        row_count=5,
                        column_count=(3, "fixed")  # Patched col_count deprecation for Gradio 6
                    )
                with gr.TabItem("Code & Schema Artifacts"):
                    code_display = gr.Code(language="markdown", label="Isolated Scripts", lines=15)

    trigger_btn.click(
        fn=run_pipeline,
        inputs=[audio_feed, lang_selector, workflow_selector, instruction_overlay],
        outputs=[summary_display, task_matrix, code_display, file_download]
    )

if __name__ == "__main__":
    demo.launch(theme=gr.themes.Monochrome())