from fastapi import FastAPI, File, UploadFile, Form from fastapi.responses import HTMLResponse, JSONResponse import torch import torchaudio from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForCausalLM app = FastAPI() # Load IBM Granite models once on startup SPEECH_MODEL = "ibm-granite/granite-speech-3.3-8b" LLM_MODEL = "ibm-granite/granite-3.3-8b-instruct" speech_processor = AutoProcessor.from_pretrained(SPEECH_MODEL) speech_model = AutoModelForSpeechSeq2Seq.from_pretrained(SPEECH_MODEL).to("cpu") tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL) llm_model = AutoModelForCausalLM.from_pretrained(LLM_MODEL).to("cpu") @app.get("/", response_class=HTMLResponse) def home(): return """

Meeting Memory Workflow Automation

""" @app.post("/transcribe") async def transcribe(audiofile: UploadFile = File(...)): audio_bytes = await audiofile.read() import io wav = io.BytesIO(audio_bytes) audio, sample_rate = torchaudio.load(wav) inputs = speech_processor(audio, sampling_rate=sample_rate, return_tensors="pt").to(speech_model.device) generated_ids = speech_model.generate(**inputs) transcript = speech_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] # Summarize prompt = f"Summarize the following text: {transcript}" inputs = tokenizer(prompt, return_tensors="pt").to(llm_model.device) summary_ids = llm_model.generate(**inputs, max_new_tokens=200) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True).replace(prompt, "").strip() html = f"""

Transcript

{transcript}

Summary

{summary}
Back """ return HTMLResponse(content=html) # Add any orchestration/agent endpoints as needed!