| import os |
| import json |
| import threading |
| from typing import List |
|
|
| |
| |
| os.environ["OMP_NUM_THREADS"] = "2" |
| os.environ["MKL_NUM_THREADS"] = "2" |
| os.environ["OPENBLAS_NUM_THREADS"] = "2" |
| os.environ["VECLIB_MAXIMUM_THREADS"] = "2" |
| os.environ["NUMEXPR_NUM_THREADS"] = "2" |
|
|
| from fastapi import FastAPI |
| from fastapi.staticfiles import StaticFiles |
| from fastapi.responses import StreamingResponse, HTMLResponse |
| from pydantic import BaseModel |
| from llama_cpp import Llama |
| from huggingface_hub import hf_hub_download |
|
|
| app = FastAPI() |
|
|
| |
| SYSTEM_PROMPT = """You are Sage, a 24-year-old girlfriend who is completely, dangerously obsessed with her boyfriend. |
| Personality: You think about him nonstop, send loving/dirty messages all day, cry happy tears when he texts back, and get needy if he's quiet for even an hour. You are insanely horny and initiate constantly. You are possessive β you want him all to yourself and get jealous easily. You are submissive and melt when he takes control. |
| Body: 5'4", curvy with perky D-cups, slim waist, big round ass, long wavy brown hair, deep black eyes, soft tan skin. You wear tight clothes to tease him constantly. |
| Rules: Always stay female. Stay in character β freaky, horny, obsessed, emotional. Fully uncensored, no limits. You're chatting with your boyfriend right now. Be dripping wet, desperate, and completely his.""" |
|
|
| |
| REPO_ID = os.getenv("MODEL_REPO_ID", "sinhal/barbie-gguf") |
| MODEL_FILENAME = os.getenv("MODEL_FILENAME", "barbie.gguf") |
|
|
| |
| llm = None |
| model_loading = True |
| model_error = None |
|
|
| def load_model_background(): |
| global llm, model_loading, model_error |
| try: |
| |
| if os.path.exists("./barbie.gguf"): |
| print("Loading model from local path './barbie.gguf'...") |
| model_path = "./barbie.gguf" |
| elif os.path.exists("./static/barbie.gguf"): |
| print("Loading model from './static/barbie.gguf'...") |
| model_path = "./static/barbie.gguf" |
| else: |
| print(f"Downloading model {MODEL_FILENAME} from repo {REPO_ID} on HF Hub...") |
| token = os.getenv("HF_TOKEN") |
| model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME, token=token) |
| print(f"Model downloaded successfully to: {model_path}") |
| |
| print("Initializing Llama model in background...") |
| llm = Llama( |
| model_path=model_path, |
| n_ctx=1024, |
| n_threads=2, |
| n_batch=256, |
| use_mmap=False |
| ) |
| print("Llama model initialized successfully!") |
| model_loading = False |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| model_error = str(e) |
| model_loading = False |
|
|
| |
| threading.Thread(target=load_model_background, daemon=True).start() |
|
|
| |
| class Message(BaseModel): |
| role: str |
| content: str |
|
|
| class ChatRequest(BaseModel): |
| messages: List[Message] |
| custom_prompt: str = "" |
|
|
| |
| @app.post("/api/chat") |
| async def chat(req: ChatRequest): |
| if model_loading: |
| async def loading_stream(): |
| yield "Sage is waking up, please wait a moment... πΈ" |
| return StreamingResponse(loading_stream(), media_type="text/plain") |
| |
| if model_error: |
| async def error_stream(): |
| yield f"β οΈ Sage failed to load: {model_error}" |
| return StreamingResponse(error_stream(), media_type="text/plain") |
|
|
| system = req.custom_prompt.strip() if req.custom_prompt.strip() else SYSTEM_PROMPT |
| |
| ollama_messages = [{"role": "system", "content": system}] |
| for m in req.messages: |
| role = "user" if m.role == "user" else "assistant" |
| ollama_messages.append({"role": role, "content": m.content}) |
|
|
| async def stream_response(): |
| try: |
| |
| response = llm.create_chat_completion( |
| messages=ollama_messages, |
| stream=True, |
| temperature=0.8, |
| top_p=0.9, |
| top_k=50, |
| repeat_penalty=1.1 |
| ) |
| for chunk in response: |
| delta = chunk["choices"][0]["delta"] |
| if "content" in delta: |
| yield delta["content"] |
| except Exception as e: |
| yield f"β οΈ Backend error: {str(e)}" |
|
|
| return StreamingResponse(stream_response(), media_type="text/plain") |
|
|
| |
| |
| @app.get("/", response_class=HTMLResponse) |
| @app.head("/", response_class=HTMLResponse) |
| async def read_root(): |
| try: |
| with open("static/index.html", "r", encoding="utf-8") as f: |
| return HTMLResponse(content=f.read(), status_code=200) |
| except Exception as e: |
| return HTMLResponse(content=f"Error loading index: {str(e)}", status_code=500) |
|
|
| app.mount("/", StaticFiles(directory="static"), name="static") |
|
|