Spaces:

Krish-05
/

chatbot_final

Sleeping

App Files Files Community

chatbot_final / main.py

Krish-05

Update main.py

1420ea1 verified 5 months ago

raw

history blame contribute delete

6.17 kB

	import ollama
	import os
	import logging
	from fastapi import FastAPI, Request, HTTPException, UploadFile, File
	from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
	from fastapi.staticfiles import StaticFiles
	from pydantic import BaseModel
	from langchain_community.llms import Ollama
	import asyncio
	import json
	import httpx

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Initialize FastAPI app
	app = FastAPI()

	# IMPORTANT: Set Ollama host for both langchain and direct httpx calls
	OLLAMA_HOST_URL = "http://127.0.0.1:11434" # Ollama runs locally within the Docker container
	os.environ["OLLAMA_HOST"] = OLLAMA_HOST_URL

	MODEL_NAME = 'krishna_choudhary/tinyllama:latest' # For your LLM
	WHISPER_MODEL_NAME = 'whisper:tiny' # For transcription

	# Mount static files for the React frontend
	app.mount("/assets", StaticFiles(directory="frontend/dist/assets"), name="assets")

	@app.get("/", response_class=HTMLResponse)
	async def serve_frontend():
	try:
	with open("frontend/dist/index.html", "r") as f:
	return HTMLResponse(f.read())
	except FileNotFoundError:
	logger.error("frontend/dist/index.html not found. Have you run `npm run build` in your frontend directory?")
	raise HTTPException(status_code=404, detail="Frontend index.html not found. Please ensure React build is complete.")

	def get_llm():
	return Ollama(model=MODEL_NAME)

	class Question(BaseModel):
	text: str

	@app.post("/ask")
	async def ask_question(question: Question):
	try:
	llm = get_llm()
	logger.info(f"Received prompt: {question.text}")
	async def generate_and_stream():
	try:
	async for chunk in llm.astream(question.text):
	for char in chunk:
	yield f"data: {json.dumps({'token': char})}\n\n"
	await asyncio.sleep(0.01)
	yield "data: {\"event\": \"end\"}\n\n"
	except Exception as e:
	logger.error(f"Error during Ollama LLM stream: {e}", exc_info=True)
	yield f"data: {json.dumps({'event': 'error', 'error': str(e)})}\n\n"
	yield "data: {\"event\": \"end\"}\n\n"
	return StreamingResponse(generate_and_stream(), media_type="text/event-stream")
	except Exception as e:
	logger.error(f"Error preparing streaming response for LLM: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

	@app.post("/transcribe-audio")
	async def transcribe_audio(audio_file: UploadFile = File(...)):
	"""
	Receives an audio file, sends it to Ollama's Whisper model for transcription,
	and returns the transcribed text.
	"""
	if not audio_file.content_type.startswith("audio/"):
	raise HTTPException(status_code=400, detail="Invalid file type. Please upload an audio file.")

	# Ollama's API for transcription expects a file path or bytes to be sent.
	# We'll save the uploaded file temporarily and then send it.
	temp_audio_path = f"/tmp/{audio_file.filename}" # Use /tmp for temporary files
	try:
	# Save the uploaded file to a temporary location
	with open(temp_audio_path, "wb") as f:
	f.write(await audio_file.read())

	logger.info(f"Sending audio file {temp_audio_path} to Ollama for transcription.")

	async with httpx.AsyncClient() as client:
	with open(temp_audio_path, "rb") as f:
	files = {"file": (audio_file.filename, f, audio_file.content_type)}

	import ollama
	audio_bytes = await audio_file.read()
	import base64
	encoded_audio = base64.b64encode(audio_bytes).decode('utf-8')

	ollama_transcribe_payload = {
	"model": WHISPER_MODEL_NAME,
	"prompt": "",
	"stream": False,
	"options": {

	},
	"images": [encoded_audio]
	}

	ollama_response = await client.post(
	f"{OLLAMA_HOST_URL}/api/generate",
	json=ollama_transcribe_payload,
	timeout=600
	)
	ollama_response.raise_for_status()

	response_data = ollama_response.json()
	transcription = response_data.get("response", "").strip()

	if not transcription:
	logger.warning("Ollama Whisper returned empty transcription.")
	raise HTTPException(status_code=500, detail="Failed to get transcription from Whisper model.")

	return JSONResponse(content={"transcription": transcription})

	except httpx.RequestError as e:
	logger.error(f"Network error communicating with Ollama: {e}", exc_info=True)
	raise HTTPException(status_code=503, detail=f"Could not connect to Ollama service: {str(e)}")
	except httpx.HTTPStatusError as e:
	logger.error(f"Ollama API returned an error: {e.response.status_code} - {e.response.text}", exc_info=True)
	raise HTTPException(status_code=e.response.status_code, detail=f"Ollama API error: {e.response.text}")
	except Exception as e:
	logger.error(f"Error during audio transcription: {e}", exc_info=True)
	raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
	finally:
	# remove the audio after processing
	if os.path.exists(temp_audio_path):
	os.remove(temp_audio_path)

	@app.on_event("startup")
	async def startup_event():
	logger.info(f"Starting up with LLM model: {MODEL_NAME} and Whisper model: {WHISPER_MODEL_NAME}")
	client = ollama.AsyncClient(host=OLLAMA_HOST_URL)
	try:
	await client.list() # Check if Ollama is responsive
	logger.info("Ollama server is accessible.")
	except Exception as e:
	logger.error(f"Ollama server not accessible at startup: {e}")

	@app.on_event("shutdown")
	async def shutdown_event():
	logger.info("Shutting down FastAPI application.")