import ollama import os import logging from fastapi import FastAPI, Request, HTTPException, UploadFile, File from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from langchain_community.llms import Ollama import asyncio import json import httpx # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Initialize FastAPI app app = FastAPI() # IMPORTANT: Set Ollama host for both langchain and direct httpx calls OLLAMA_HOST_URL = "http://127.0.0.1:11434" # Ollama runs locally within the Docker container os.environ["OLLAMA_HOST"] = OLLAMA_HOST_URL MODEL_NAME = 'krishna_choudhary/tinyllama:latest' # For your LLM WHISPER_MODEL_NAME = 'whisper:tiny' # For transcription # Mount static files for the React frontend app.mount("/assets", StaticFiles(directory="frontend/dist/assets"), name="assets") @app.get("/", response_class=HTMLResponse) async def serve_frontend(): try: with open("frontend/dist/index.html", "r") as f: return HTMLResponse(f.read()) except FileNotFoundError: logger.error("frontend/dist/index.html not found. Have you run `npm run build` in your frontend directory?") raise HTTPException(status_code=404, detail="Frontend index.html not found. Please ensure React build is complete.") def get_llm(): return Ollama(model=MODEL_NAME) class Question(BaseModel): text: str @app.post("/ask") async def ask_question(question: Question): try: llm = get_llm() logger.info(f"Received prompt: {question.text}") async def generate_and_stream(): try: async for chunk in llm.astream(question.text): for char in chunk: yield f"data: {json.dumps({'token': char})}\n\n" await asyncio.sleep(0.01) yield "data: {\"event\": \"end\"}\n\n" except Exception as e: logger.error(f"Error during Ollama LLM stream: {e}", exc_info=True) yield f"data: {json.dumps({'event': 'error', 'error': str(e)})}\n\n" yield "data: {\"event\": \"end\"}\n\n" return StreamingResponse(generate_and_stream(), media_type="text/event-stream") except Exception as e: logger.error(f"Error preparing streaming response for LLM: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") @app.post("/transcribe-audio") async def transcribe_audio(audio_file: UploadFile = File(...)): """ Receives an audio file, sends it to Ollama's Whisper model for transcription, and returns the transcribed text. """ if not audio_file.content_type.startswith("audio/"): raise HTTPException(status_code=400, detail="Invalid file type. Please upload an audio file.") # Ollama's API for transcription expects a file path or bytes to be sent. # We'll save the uploaded file temporarily and then send it. temp_audio_path = f"/tmp/{audio_file.filename}" # Use /tmp for temporary files try: # Save the uploaded file to a temporary location with open(temp_audio_path, "wb") as f: f.write(await audio_file.read()) logger.info(f"Sending audio file {temp_audio_path} to Ollama for transcription.") async with httpx.AsyncClient() as client: with open(temp_audio_path, "rb") as f: files = {"file": (audio_file.filename, f, audio_file.content_type)} import ollama audio_bytes = await audio_file.read() import base64 encoded_audio = base64.b64encode(audio_bytes).decode('utf-8') ollama_transcribe_payload = { "model": WHISPER_MODEL_NAME, "prompt": "", "stream": False, "options": { }, "images": [encoded_audio] } ollama_response = await client.post( f"{OLLAMA_HOST_URL}/api/generate", json=ollama_transcribe_payload, timeout=600 ) ollama_response.raise_for_status() response_data = ollama_response.json() transcription = response_data.get("response", "").strip() if not transcription: logger.warning("Ollama Whisper returned empty transcription.") raise HTTPException(status_code=500, detail="Failed to get transcription from Whisper model.") return JSONResponse(content={"transcription": transcription}) except httpx.RequestError as e: logger.error(f"Network error communicating with Ollama: {e}", exc_info=True) raise HTTPException(status_code=503, detail=f"Could not connect to Ollama service: {str(e)}") except httpx.HTTPStatusError as e: logger.error(f"Ollama API returned an error: {e.response.status_code} - {e.response.text}", exc_info=True) raise HTTPException(status_code=e.response.status_code, detail=f"Ollama API error: {e.response.text}") except Exception as e: logger.error(f"Error during audio transcription: {e}", exc_info=True) raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}") finally: # remove the audio after processing if os.path.exists(temp_audio_path): os.remove(temp_audio_path) @app.on_event("startup") async def startup_event(): logger.info(f"Starting up with LLM model: {MODEL_NAME} and Whisper model: {WHISPER_MODEL_NAME}") client = ollama.AsyncClient(host=OLLAMA_HOST_URL) try: await client.list() # Check if Ollama is responsive logger.info("Ollama server is accessible.") except Exception as e: logger.error(f"Ollama server not accessible at startup: {e}") @app.on_event("shutdown") async def shutdown_event(): logger.info("Shutting down FastAPI application.")