Spaces:
Sleeping
Sleeping
| import ollama | |
| import os | |
| import logging | |
| from fastapi import FastAPI, Request, HTTPException, UploadFile, File | |
| from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from pydantic import BaseModel | |
| from langchain_community.llms import Ollama | |
| import asyncio | |
| import json | |
| import httpx | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Initialize FastAPI app | |
| app = FastAPI() | |
| # IMPORTANT: Set Ollama host for both langchain and direct httpx calls | |
| OLLAMA_HOST_URL = "http://127.0.0.1:11434" # Ollama runs locally within the Docker container | |
| os.environ["OLLAMA_HOST"] = OLLAMA_HOST_URL | |
| MODEL_NAME = 'krishna_choudhary/tinyllama:latest' # For your LLM | |
| WHISPER_MODEL_NAME = 'whisper:tiny' # For transcription | |
| # Mount static files for the React frontend | |
| app.mount("/assets", StaticFiles(directory="frontend/dist/assets"), name="assets") | |
| async def serve_frontend(): | |
| try: | |
| with open("frontend/dist/index.html", "r") as f: | |
| return HTMLResponse(f.read()) | |
| except FileNotFoundError: | |
| logger.error("frontend/dist/index.html not found. Have you run `npm run build` in your frontend directory?") | |
| raise HTTPException(status_code=404, detail="Frontend index.html not found. Please ensure React build is complete.") | |
| def get_llm(): | |
| return Ollama(model=MODEL_NAME) | |
| class Question(BaseModel): | |
| text: str | |
| async def ask_question(question: Question): | |
| try: | |
| llm = get_llm() | |
| logger.info(f"Received prompt: {question.text}") | |
| async def generate_and_stream(): | |
| try: | |
| async for chunk in llm.astream(question.text): | |
| for char in chunk: | |
| yield f"data: {json.dumps({'token': char})}\n\n" | |
| await asyncio.sleep(0.01) | |
| yield "data: {\"event\": \"end\"}\n\n" | |
| except Exception as e: | |
| logger.error(f"Error during Ollama LLM stream: {e}", exc_info=True) | |
| yield f"data: {json.dumps({'event': 'error', 'error': str(e)})}\n\n" | |
| yield "data: {\"event\": \"end\"}\n\n" | |
| return StreamingResponse(generate_and_stream(), media_type="text/event-stream") | |
| except Exception as e: | |
| logger.error(f"Error preparing streaming response for LLM: {e}", exc_info=True) | |
| raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") | |
| async def transcribe_audio(audio_file: UploadFile = File(...)): | |
| """ | |
| Receives an audio file, sends it to Ollama's Whisper model for transcription, | |
| and returns the transcribed text. | |
| """ | |
| if not audio_file.content_type.startswith("audio/"): | |
| raise HTTPException(status_code=400, detail="Invalid file type. Please upload an audio file.") | |
| # Ollama's API for transcription expects a file path or bytes to be sent. | |
| # We'll save the uploaded file temporarily and then send it. | |
| temp_audio_path = f"/tmp/{audio_file.filename}" # Use /tmp for temporary files | |
| try: | |
| # Save the uploaded file to a temporary location | |
| with open(temp_audio_path, "wb") as f: | |
| f.write(await audio_file.read()) | |
| logger.info(f"Sending audio file {temp_audio_path} to Ollama for transcription.") | |
| async with httpx.AsyncClient() as client: | |
| with open(temp_audio_path, "rb") as f: | |
| files = {"file": (audio_file.filename, f, audio_file.content_type)} | |
| import ollama | |
| audio_bytes = await audio_file.read() | |
| import base64 | |
| encoded_audio = base64.b64encode(audio_bytes).decode('utf-8') | |
| ollama_transcribe_payload = { | |
| "model": WHISPER_MODEL_NAME, | |
| "prompt": "", | |
| "stream": False, | |
| "options": { | |
| }, | |
| "images": [encoded_audio] | |
| } | |
| ollama_response = await client.post( | |
| f"{OLLAMA_HOST_URL}/api/generate", | |
| json=ollama_transcribe_payload, | |
| timeout=600 | |
| ) | |
| ollama_response.raise_for_status() | |
| response_data = ollama_response.json() | |
| transcription = response_data.get("response", "").strip() | |
| if not transcription: | |
| logger.warning("Ollama Whisper returned empty transcription.") | |
| raise HTTPException(status_code=500, detail="Failed to get transcription from Whisper model.") | |
| return JSONResponse(content={"transcription": transcription}) | |
| except httpx.RequestError as e: | |
| logger.error(f"Network error communicating with Ollama: {e}", exc_info=True) | |
| raise HTTPException(status_code=503, detail=f"Could not connect to Ollama service: {str(e)}") | |
| except httpx.HTTPStatusError as e: | |
| logger.error(f"Ollama API returned an error: {e.response.status_code} - {e.response.text}", exc_info=True) | |
| raise HTTPException(status_code=e.response.status_code, detail=f"Ollama API error: {e.response.text}") | |
| except Exception as e: | |
| logger.error(f"Error during audio transcription: {e}", exc_info=True) | |
| raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}") | |
| finally: | |
| # remove the audio after processing | |
| if os.path.exists(temp_audio_path): | |
| os.remove(temp_audio_path) | |
| async def startup_event(): | |
| logger.info(f"Starting up with LLM model: {MODEL_NAME} and Whisper model: {WHISPER_MODEL_NAME}") | |
| client = ollama.AsyncClient(host=OLLAMA_HOST_URL) | |
| try: | |
| await client.list() # Check if Ollama is responsive | |
| logger.info("Ollama server is accessible.") | |
| except Exception as e: | |
| logger.error(f"Ollama server not accessible at startup: {e}") | |
| async def shutdown_event(): | |
| logger.info("Shutting down FastAPI application.") |