from fastapi import FastAPI, HTTPException import requests import json app = FastAPI() # Ollama API endpoint (running inside the container) OLLAMA_API = "http://localhost:11434/api/generate" @app.get("/") async def root(): return {"message": "Welcome to the custom LLM API for Qwen3-4B RPG Roleplay"} @app.post("/generate") async def generate(prompt: str): try: # Send request to Ollama API payload = { "model": "hf.co/Chun121/qwen3-4B-rpg-roleplay:Q4_K_M", "prompt": prompt, "stream": False } response = requests.post(OLLAMA_API, json=payload) response.raise_for_status() return response.json() except requests.RequestException as e: raise HTTPException(status_code=500, detail=f"Error communicating with Ollama: {str(e)}") @app.post("/chat") async def chat(prompt: str, context: str = ""): try: payload = { "model": "hf.co/Chun121/qwen3-4B-rpg-roleplay:Q4_K_M", "prompt": f"{context}\nUser: {prompt}", "stream": False } response = requests.post(OLLAMA_API, json=payload) response.raise_for_status() return response.json() except requests.RequestException as e: raise HTTPException(status_code=500, detail=f"Error communicating with Ollama: {str(e)}")