File size: 1,353 Bytes
83ff9b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03d5c0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from fastapi import FastAPI, HTTPException
import requests
import json

app = FastAPI()

# Ollama API endpoint (running inside the container)
OLLAMA_API = "http://localhost:11434/api/generate"

@app.get("/")
async def root():
    return {"message": "Welcome to the custom LLM API for Qwen3-4B RPG Roleplay"}

@app.post("/generate")
async def generate(prompt: str):
    try:
        # Send request to Ollama API
        payload = {
            "model": "hf.co/Chun121/qwen3-4B-rpg-roleplay:Q4_K_M",
            "prompt": prompt,
            "stream": False
        }
        response = requests.post(OLLAMA_API, json=payload)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        raise HTTPException(status_code=500, detail=f"Error communicating with Ollama: {str(e)}")

@app.post("/chat")
async def chat(prompt: str, context: str = ""):
    try:
        payload = {
            "model": "hf.co/Chun121/qwen3-4B-rpg-roleplay:Q4_K_M",
            "prompt": f"{context}\nUser: {prompt}",
            "stream": False
        }
        response = requests.post(OLLAMA_API, json=payload)
        response.raise_for_status()
        return response.json()
    except requests.RequestException as e:
        raise HTTPException(status_code=500, detail=f"Error communicating with Ollama: {str(e)}")