fone / app.py
nathfavour's picture
Update app.py
6f9f10b verified
Raw
History Blame Contribute Delete
7.8 kB
import gradio as gr
import torch
import os
import re
import json
import spaces
from transformers import AutoProcessor, CohereAsrForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
from transformers.audio_utils import load_audio
# -------------------------------------------------------------------------
# NATIVE ZERO-GPU MODEL INITIALIZATION
# -------------------------------------------------------------------------
# Models are initialized at the module level using .to("cuda") so ZeroGPU can handle fast-state caching.
print("--> Initializing Cohere Transcribe (2B Parameter Layer)...")
asr_id = "CohereLabs/cohere-transcribe-03-2026"
asr_processor = AutoProcessor.from_pretrained(asr_id)
asr_model = CohereAsrForConditionalGeneration.from_pretrained(asr_id, torch_dtype=torch.float16)
asr_model.to("cuda")
print("--> Initializing Tiny Aya Earth (3.35B Parameter Layer)...")
llm_id = "CohereLabs/tiny-aya-earth"
llm_tokenizer = AutoTokenizer.from_pretrained(llm_id)
llm_model = AutoModelForCausalLM.from_pretrained(llm_id, torch_dtype=torch.float16)
llm_model.to("cuda")
# -------------------------------------------------------------------------
# TEXT PROCESSING & REGEX CLEANING
# -------------------------------------------------------------------------
def clean_and_parse_json(raw_text):
"""
Cleans structural code blocks out of the raw LLM output to safeguard the JSON payload.
"""
try:
cleaned = re.sub(r"<think>.*?</think>", "", raw_text, flags=re.DOTALL)
json_match = re.search(r"```json\s*(.*?)\s*```", cleaned, re.DOTALL)
if json_match:
cleaned = json_match.group(1)
return json.loads(cleaned.strip())
except Exception:
return {
"summary": "Direct extraction parsing failed. Review raw output block in Code tab.",
"tasks": [["Review raw compilation logs", "High", "Action Required"]],
"code": raw_text
}
# -------------------------------------------------------------------------
# PIPELINE ORCHESTRATION (RUNS EXCLUSIVELY ON ZERO-GPU RESOURCING)
# -------------------------------------------------------------------------
@spaces.GPU(duration=90)
def run_pipeline(audio_path, language_code, workflow_type, extra_instructions):
if not audio_path:
return "Error: Empty audio track received.", [], "No execution context.", None
# Step 1: Native Speech-to-Text Processing
try:
audio = load_audio(audio_path, sampling_rate=16000)
asr_inputs = asr_processor(audio, sampling_rate=16000, return_tensors="pt", language=language_code)
# Move inputs explicitly to CUDA
asr_inputs = {k: v.to("cuda", dtype=asr_model.dtype) if torch.is_tensor(v) else v for k, v in asr_inputs.items()}
with torch.no_grad():
asr_outputs = asr_model.generate(**asr_inputs, max_new_tokens=256)
transcript = asr_processor.decode(asr_outputs, skip_special_tokens=True)
except Exception as e:
transcript = f"[ASR Layer Critical Failure: {str(e)}]"
# Step 2: System Architecture Payload Formatting
system_prompt = (
"You are an authoritative backend systems architect. Analyze the provided context "
"and output a strict JSON object with exactly three keys: 'summary' (string), 'tasks' (list of lists "
"where each item is [Task Name, Priority Low/Medium/High, Status Context]), and 'code' (clean markdown script or schema block).\n"
"Do not include conversational filler. Return ONLY valid JSON."
)
user_content = f"Workflow Class: {workflow_type}\nContext: {transcript}\nModifiers: {extra_instructions}"
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_content}
]
try:
inputs = llm_tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt"
).to("cuda")
with torch.no_grad():
outputs = llm_model.generate(
**inputs,
max_new_tokens=1024,
do_sample=True,
temperature=0.1,
top_p=0.95
)
response_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
raw_output = llm_tokenizer.decode(response_tokens, skip_special_tokens=True)
# Breakdown raw text arrays into programmatic components
parsed_data = clean_and_parse_json(raw_output)
summary = parsed_data.get("summary", "No summary processed.")
tasks = parsed_data.get("tasks", [])
code_block = parsed_data.get("code", "# No artifacts compiled.")
# Write asset data out to local disk space for immediate client download
output_filename = "fone_architecture_spec.md"
with open(output_filename, "w") as f:
f.write(f"# FONE SPECIFICATION\n\n## Audio Transcript\n{transcript}\n\n## Scope Summary\n{summary}")
return summary, tasks, code_block, output_filename
except Exception as e:
return f"LLM Generation Failure: {str(e)}", [], f"```python\n# Execution Trace\n{str(e)}\n```", None
# -------------------------------------------------------------------------
# INTERFACE ORCHESTRATION (GRADIO 6 BLOCK COMPLIANT)
# -------------------------------------------------------------------------
with gr.Blocks(title="fone // Sovereign Workspace") as demo:
gr.Markdown("## 🎛️ fone // Voice Architecture Pipeline")
gr.Markdown("*Decentralized hardware orchestration running on native containerized ZeroGPU frames.*")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 📥 Input Core")
audio_feed = gr.Audio(type="filepath", label="Voice Master Input")
with gr.Row():
lang_selector = gr.Dropdown(choices=["en", "fr", "es", "de", "ar", "ja"], value="en", label="Input Language")
workflow_selector = gr.Dropdown(
choices=["Feature Engineering Specification", "Database Schema Map", "Automated System Scripts"],
value="Feature Engineering Specification",
label="Routing Class"
)
instruction_overlay = gr.Textbox(label="Execution Modifiers", placeholder="e.g., Target strict Tailwind configurations...")
trigger_btn = gr.Button("Execute Pipeline Trace", variant="primary")
file_download = gr.File(label="Exported System Artifacts")
with gr.Column(scale=2):
gr.Markdown("### 📤 Orchestration Hub")
with gr.Tabs():
with gr.TabItem("System Summary"):
summary_display = gr.Textbox(label="Extracted Scope", lines=8, interactive=False)
with gr.TabItem("Task Allocation Matrix"):
task_matrix = gr.Dataframe(
headers=["Objective / Component", "Priority Rank", "Status Context"],
datatype=["str", "str", "str"],
row_count=5,
column_count=(3, "fixed") # Patched col_count deprecation for Gradio 6
)
with gr.TabItem("Code & Schema Artifacts"):
code_display = gr.Code(language="markdown", label="Isolated Scripts", lines=15)
trigger_btn.click(
fn=run_pipeline,
inputs=[audio_feed, lang_selector, workflow_selector, instruction_overlay],
outputs=[summary_display, task_matrix, code_display, file_download]
)
if __name__ == "__main__":
demo.launch(theme=gr.themes.Monochrome())