File size: 6,171 Bytes
7cef6fc
9f7a5a6
 
 
 
 
 
 
 
 
7cef6fc
9f7a5a6
 
 
 
 
 
 
 
 
 
 
 
 
1420ea1
9f7a5a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import ollama
import os
import logging
from fastapi import FastAPI, Request, HTTPException, UploadFile, File
from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from langchain_community.llms import Ollama
import asyncio
import json
import httpx 

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize FastAPI app
app = FastAPI()

# IMPORTANT: Set Ollama host for both langchain and direct httpx calls
OLLAMA_HOST_URL = "http://127.0.0.1:11434" # Ollama runs locally within the Docker container
os.environ["OLLAMA_HOST"] = OLLAMA_HOST_URL

MODEL_NAME = 'krishna_choudhary/tinyllama:latest' # For your LLM
WHISPER_MODEL_NAME = 'whisper:tiny' # For transcription

# Mount static files for the React frontend
app.mount("/assets", StaticFiles(directory="frontend/dist/assets"), name="assets")

@app.get("/", response_class=HTMLResponse)
async def serve_frontend():
    try:
        with open("frontend/dist/index.html", "r") as f:
            return HTMLResponse(f.read())
    except FileNotFoundError:
        logger.error("frontend/dist/index.html not found. Have you run `npm run build` in your frontend directory?")
        raise HTTPException(status_code=404, detail="Frontend index.html not found. Please ensure React build is complete.")

def get_llm():
    return Ollama(model=MODEL_NAME)

class Question(BaseModel):
    text: str

@app.post("/ask")
async def ask_question(question: Question):
    try:
        llm = get_llm()
        logger.info(f"Received prompt: {question.text}")
        async def generate_and_stream():
            try:
                async for chunk in llm.astream(question.text):
                    for char in chunk:
                        yield f"data: {json.dumps({'token': char})}\n\n"
                        await asyncio.sleep(0.01)
                yield "data: {\"event\": \"end\"}\n\n"
            except Exception as e:
                logger.error(f"Error during Ollama LLM stream: {e}", exc_info=True)
                yield f"data: {json.dumps({'event': 'error', 'error': str(e)})}\n\n"
                yield "data: {\"event\": \"end\"}\n\n"
        return StreamingResponse(generate_and_stream(), media_type="text/event-stream")
    except Exception as e:
        logger.error(f"Error preparing streaming response for LLM: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

@app.post("/transcribe-audio")
async def transcribe_audio(audio_file: UploadFile = File(...)):
    """
    Receives an audio file, sends it to Ollama's Whisper model for transcription,
    and returns the transcribed text.
    """
    if not audio_file.content_type.startswith("audio/"):
        raise HTTPException(status_code=400, detail="Invalid file type. Please upload an audio file.")

    # Ollama's API for transcription expects a file path or bytes to be sent.
    # We'll save the uploaded file temporarily and then send it.
    temp_audio_path = f"/tmp/{audio_file.filename}" # Use /tmp for temporary files
    try:
        # Save the uploaded file to a temporary location
        with open(temp_audio_path, "wb") as f:
            f.write(await audio_file.read())

        logger.info(f"Sending audio file {temp_audio_path} to Ollama for transcription.")

        async with httpx.AsyncClient() as client:
            with open(temp_audio_path, "rb") as f:
                files = {"file": (audio_file.filename, f, audio_file.content_type)}
                
                import ollama
                audio_bytes = await audio_file.read()
                import base64
                encoded_audio = base64.b64encode(audio_bytes).decode('utf-8')

                ollama_transcribe_payload = {
                    "model": WHISPER_MODEL_NAME,
                    "prompt": "", 
                    "stream": False,
                    "options": {
                       
                    },
                    "images": [encoded_audio] 
                }

                ollama_response = await client.post(
                    f"{OLLAMA_HOST_URL}/api/generate",
                    json=ollama_transcribe_payload,
                    timeout=600 
                )
                ollama_response.raise_for_status()

                response_data = ollama_response.json()
                transcription = response_data.get("response", "").strip()

                if not transcription:
                    logger.warning("Ollama Whisper returned empty transcription.")
                    raise HTTPException(status_code=500, detail="Failed to get transcription from Whisper model.")

                return JSONResponse(content={"transcription": transcription})

    except httpx.RequestError as e:
        logger.error(f"Network error communicating with Ollama: {e}", exc_info=True)
        raise HTTPException(status_code=503, detail=f"Could not connect to Ollama service: {str(e)}")
    except httpx.HTTPStatusError as e:
        logger.error(f"Ollama API returned an error: {e.response.status_code} - {e.response.text}", exc_info=True)
        raise HTTPException(status_code=e.response.status_code, detail=f"Ollama API error: {e.response.text}")
    except Exception as e:
        logger.error(f"Error during audio transcription: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
    finally:
        # remove the audio after processing
        if os.path.exists(temp_audio_path):
            os.remove(temp_audio_path)

@app.on_event("startup")
async def startup_event():
    logger.info(f"Starting up with LLM model: {MODEL_NAME} and Whisper model: {WHISPER_MODEL_NAME}")
    client = ollama.AsyncClient(host=OLLAMA_HOST_URL)
    try:
        await client.list() # Check if Ollama is responsive
        logger.info("Ollama server is accessible.")
    except Exception as e:
        logger.error(f"Ollama server not accessible at startup: {e}")

@app.on_event("shutdown")
async def shutdown_event():
    logger.info("Shutting down FastAPI application.")