Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import StreamingResponse | |
| from pydantic import BaseModel | |
| from langchain_community.llms import Ollama | |
| from langchain_core.messages import HumanMessage | |
| import logging | |
| from functools import lru_cache | |
| from langchain.callbacks.manager import CallbackManager | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| app = FastAPI() | |
| MODEL_NAME = 'qwen3:0.6b' | |
| def get_llm(): | |
| callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| return Ollama(model=MODEL_NAME, callback_manager=callback_manager) | |
| class Question(BaseModel): | |
| text: str | |
| def read_root(): | |
| return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"} | |
| async def ask_question(question: Question): | |
| try: | |
| logger.info(f"Received question: {question.text}") | |
| llm = get_llm() | |
| response = llm.invoke(question.text) | |
| logger.info("Response generated successfully") | |
| return {"answer": response} | |
| except Exception as e: | |
| logger.error(f"Error in /ask endpoint: {str(e)}") | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def ask_question_stream(question: Question): | |
| try: | |
| logger.info(f"Received question for streaming: {question.text}") | |
| llm = get_llm() | |
| async def generate(): | |
| full_response = "" | |
| async for chunk in llm.astream(question.text): | |
| full_response += chunk | |
| yield chunk | |
| # Log the full response after streaming is complete | |
| logger.info(f"Full streamed response: {full_response}") | |
| return StreamingResponse(generate(), media_type="text/plain") | |
| except Exception as e: | |
| logger.error(f"Error in /ask_stream endpoint: {str(e)}") | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def startup_event(): | |
| logger.info(f"Starting up with model: {MODEL_NAME}") | |
| # Warm up the cache | |
| get_llm() | |
| async def shutdown_event(): | |
| logger.info("Shutting down") |