Spaces:
Sleeping
Sleeping
File size: 6,171 Bytes
7cef6fc 9f7a5a6 7cef6fc 9f7a5a6 1420ea1 9f7a5a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import ollama
import os
import logging
from fastapi import FastAPI, Request, HTTPException, UploadFile, File
from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from langchain_community.llms import Ollama
import asyncio
import json
import httpx
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize FastAPI app
app = FastAPI()
# IMPORTANT: Set Ollama host for both langchain and direct httpx calls
OLLAMA_HOST_URL = "http://127.0.0.1:11434" # Ollama runs locally within the Docker container
os.environ["OLLAMA_HOST"] = OLLAMA_HOST_URL
MODEL_NAME = 'krishna_choudhary/tinyllama:latest' # For your LLM
WHISPER_MODEL_NAME = 'whisper:tiny' # For transcription
# Mount static files for the React frontend
app.mount("/assets", StaticFiles(directory="frontend/dist/assets"), name="assets")
@app.get("/", response_class=HTMLResponse)
async def serve_frontend():
try:
with open("frontend/dist/index.html", "r") as f:
return HTMLResponse(f.read())
except FileNotFoundError:
logger.error("frontend/dist/index.html not found. Have you run `npm run build` in your frontend directory?")
raise HTTPException(status_code=404, detail="Frontend index.html not found. Please ensure React build is complete.")
def get_llm():
return Ollama(model=MODEL_NAME)
class Question(BaseModel):
text: str
@app.post("/ask")
async def ask_question(question: Question):
try:
llm = get_llm()
logger.info(f"Received prompt: {question.text}")
async def generate_and_stream():
try:
async for chunk in llm.astream(question.text):
for char in chunk:
yield f"data: {json.dumps({'token': char})}\n\n"
await asyncio.sleep(0.01)
yield "data: {\"event\": \"end\"}\n\n"
except Exception as e:
logger.error(f"Error during Ollama LLM stream: {e}", exc_info=True)
yield f"data: {json.dumps({'event': 'error', 'error': str(e)})}\n\n"
yield "data: {\"event\": \"end\"}\n\n"
return StreamingResponse(generate_and_stream(), media_type="text/event-stream")
except Exception as e:
logger.error(f"Error preparing streaming response for LLM: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@app.post("/transcribe-audio")
async def transcribe_audio(audio_file: UploadFile = File(...)):
"""
Receives an audio file, sends it to Ollama's Whisper model for transcription,
and returns the transcribed text.
"""
if not audio_file.content_type.startswith("audio/"):
raise HTTPException(status_code=400, detail="Invalid file type. Please upload an audio file.")
# Ollama's API for transcription expects a file path or bytes to be sent.
# We'll save the uploaded file temporarily and then send it.
temp_audio_path = f"/tmp/{audio_file.filename}" # Use /tmp for temporary files
try:
# Save the uploaded file to a temporary location
with open(temp_audio_path, "wb") as f:
f.write(await audio_file.read())
logger.info(f"Sending audio file {temp_audio_path} to Ollama for transcription.")
async with httpx.AsyncClient() as client:
with open(temp_audio_path, "rb") as f:
files = {"file": (audio_file.filename, f, audio_file.content_type)}
import ollama
audio_bytes = await audio_file.read()
import base64
encoded_audio = base64.b64encode(audio_bytes).decode('utf-8')
ollama_transcribe_payload = {
"model": WHISPER_MODEL_NAME,
"prompt": "",
"stream": False,
"options": {
},
"images": [encoded_audio]
}
ollama_response = await client.post(
f"{OLLAMA_HOST_URL}/api/generate",
json=ollama_transcribe_payload,
timeout=600
)
ollama_response.raise_for_status()
response_data = ollama_response.json()
transcription = response_data.get("response", "").strip()
if not transcription:
logger.warning("Ollama Whisper returned empty transcription.")
raise HTTPException(status_code=500, detail="Failed to get transcription from Whisper model.")
return JSONResponse(content={"transcription": transcription})
except httpx.RequestError as e:
logger.error(f"Network error communicating with Ollama: {e}", exc_info=True)
raise HTTPException(status_code=503, detail=f"Could not connect to Ollama service: {str(e)}")
except httpx.HTTPStatusError as e:
logger.error(f"Ollama API returned an error: {e.response.status_code} - {e.response.text}", exc_info=True)
raise HTTPException(status_code=e.response.status_code, detail=f"Ollama API error: {e.response.text}")
except Exception as e:
logger.error(f"Error during audio transcription: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
finally:
# remove the audio after processing
if os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
@app.on_event("startup")
async def startup_event():
logger.info(f"Starting up with LLM model: {MODEL_NAME} and Whisper model: {WHISPER_MODEL_NAME}")
client = ollama.AsyncClient(host=OLLAMA_HOST_URL)
try:
await client.list() # Check if Ollama is responsive
logger.info("Ollama server is accessible.")
except Exception as e:
logger.error(f"Ollama server not accessible at startup: {e}")
@app.on_event("shutdown")
async def shutdown_event():
logger.info("Shutting down FastAPI application.") |