Spaces:

build-small-hackathon
/

fone

Runtime error

App Files Files Community

fone / app.py

nathfavour

Update app.py

6f9f10b verified 14 days ago

Raw

History Blame Contribute Delete

7.8 kB

	import gradio as gr
	import torch
	import os
	import re
	import json
	import spaces
	from transformers import AutoProcessor, CohereAsrForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
	from transformers.audio_utils import load_audio

	# -------------------------------------------------------------------------
	# NATIVE ZERO-GPU MODEL INITIALIZATION
	# -------------------------------------------------------------------------
	# Models are initialized at the module level using .to("cuda") so ZeroGPU can handle fast-state caching.
	print("--> Initializing Cohere Transcribe (2B Parameter Layer)...")
	asr_id = "CohereLabs/cohere-transcribe-03-2026"
	asr_processor = AutoProcessor.from_pretrained(asr_id)
	asr_model = CohereAsrForConditionalGeneration.from_pretrained(asr_id, torch_dtype=torch.float16)
	asr_model.to("cuda")

	print("--> Initializing Tiny Aya Earth (3.35B Parameter Layer)...")
	llm_id = "CohereLabs/tiny-aya-earth"
	llm_tokenizer = AutoTokenizer.from_pretrained(llm_id)
	llm_model = AutoModelForCausalLM.from_pretrained(llm_id, torch_dtype=torch.float16)
	llm_model.to("cuda")

	# -------------------------------------------------------------------------
	# TEXT PROCESSING & REGEX CLEANING
	# -------------------------------------------------------------------------
	def clean_and_parse_json(raw_text):
	"""
	Cleans structural code blocks out of the raw LLM output to safeguard the JSON payload.
	"""
	try:
	cleaned = re.sub(r"<think>.*?</think>", "", raw_text, flags=re.DOTALL)
	json_match = re.search(r"```json\s(.?)\s*```", cleaned, re.DOTALL)
	if json_match:
	cleaned = json_match.group(1)
	return json.loads(cleaned.strip())
	except Exception:
	return {
	"summary": "Direct extraction parsing failed. Review raw output block in Code tab.",
	"tasks": [["Review raw compilation logs", "High", "Action Required"]],
	"code": raw_text
	}

	# -------------------------------------------------------------------------
	# PIPELINE ORCHESTRATION (RUNS EXCLUSIVELY ON ZERO-GPU RESOURCING)
	# -------------------------------------------------------------------------
	@spaces.GPU(duration=90)
	def run_pipeline(audio_path, language_code, workflow_type, extra_instructions):
	if not audio_path:
	return "Error: Empty audio track received.", [], "No execution context.", None

	# Step 1: Native Speech-to-Text Processing
	try:
	audio = load_audio(audio_path, sampling_rate=16000)
	asr_inputs = asr_processor(audio, sampling_rate=16000, return_tensors="pt", language=language_code)
	# Move inputs explicitly to CUDA
	asr_inputs = {k: v.to("cuda", dtype=asr_model.dtype) if torch.is_tensor(v) else v for k, v in asr_inputs.items()}

	with torch.no_grad():
	asr_outputs = asr_model.generate(**asr_inputs, max_new_tokens=256)
	transcript = asr_processor.decode(asr_outputs, skip_special_tokens=True)
	except Exception as e:
	transcript = f"[ASR Layer Critical Failure: {str(e)}]"

	# Step 2: System Architecture Payload Formatting
	system_prompt = (
	"You are an authoritative backend systems architect. Analyze the provided context "
	"and output a strict JSON object with exactly three keys: 'summary' (string), 'tasks' (list of lists "
	"where each item is [Task Name, Priority Low/Medium/High, Status Context]), and 'code' (clean markdown script or schema block).\n"
	"Do not include conversational filler. Return ONLY valid JSON."
	)
	user_content = f"Workflow Class: {workflow_type}\nContext: {transcript}\nModifiers: {extra_instructions}"

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_content}
	]

	try:
	inputs = llm_tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt"
	).to("cuda")

	with torch.no_grad():
	outputs = llm_model.generate(
	**inputs,
	max_new_tokens=1024,
	do_sample=True,
	temperature=0.1,
	top_p=0.95
	)

	response_tokens = outputs[0][inputs['input_ids'].shape[-1]:]
	raw_output = llm_tokenizer.decode(response_tokens, skip_special_tokens=True)

	# Breakdown raw text arrays into programmatic components
	parsed_data = clean_and_parse_json(raw_output)
	summary = parsed_data.get("summary", "No summary processed.")
	tasks = parsed_data.get("tasks", [])
	code_block = parsed_data.get("code", "# No artifacts compiled.")

	# Write asset data out to local disk space for immediate client download
	output_filename = "fone_architecture_spec.md"
	with open(output_filename, "w") as f:
	f.write(f"# FONE SPECIFICATION\n\n## Audio Transcript\n{transcript}\n\n## Scope Summary\n{summary}")

	return summary, tasks, code_block, output_filename
	except Exception as e:
	return f"LLM Generation Failure: {str(e)}", [], f"```python\n# Execution Trace\n{str(e)}\n```", None

	# -------------------------------------------------------------------------
	# INTERFACE ORCHESTRATION (GRADIO 6 BLOCK COMPLIANT)
	# -------------------------------------------------------------------------
	with gr.Blocks(title="fone // Sovereign Workspace") as demo:
	gr.Markdown("## 🎛️ fone // Voice Architecture Pipeline")
	gr.Markdown("Decentralized hardware orchestration running on native containerized ZeroGPU frames.")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📥 Input Core")
	audio_feed = gr.Audio(type="filepath", label="Voice Master Input")

	with gr.Row():
	lang_selector = gr.Dropdown(choices=["en", "fr", "es", "de", "ar", "ja"], value="en", label="Input Language")
	workflow_selector = gr.Dropdown(
	choices=["Feature Engineering Specification", "Database Schema Map", "Automated System Scripts"],
	value="Feature Engineering Specification",
	label="Routing Class"
	)

	instruction_overlay = gr.Textbox(label="Execution Modifiers", placeholder="e.g., Target strict Tailwind configurations...")
	trigger_btn = gr.Button("Execute Pipeline Trace", variant="primary")
	file_download = gr.File(label="Exported System Artifacts")

	with gr.Column(scale=2):
	gr.Markdown("### 📤 Orchestration Hub")
	with gr.Tabs():
	with gr.TabItem("System Summary"):
	summary_display = gr.Textbox(label="Extracted Scope", lines=8, interactive=False)
	with gr.TabItem("Task Allocation Matrix"):
	task_matrix = gr.Dataframe(
	headers=["Objective / Component", "Priority Rank", "Status Context"],
	datatype=["str", "str", "str"],
	row_count=5,
	column_count=(3, "fixed") # Patched col_count deprecation for Gradio 6
	)
	with gr.TabItem("Code & Schema Artifacts"):
	code_display = gr.Code(language="markdown", label="Isolated Scripts", lines=15)

	trigger_btn.click(
	fn=run_pipeline,
	inputs=[audio_feed, lang_selector, workflow_selector, instruction_overlay],
	outputs=[summary_display, task_matrix, code_display, file_download]
	)

	if __name__ == "__main__":
	demo.launch(theme=gr.themes.Monochrome())