FatGPT / app.py
helloperson123's picture
Update app.py
e993f37 verified
# app.py
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import re
# ========================= SETTINGS =========================
MODEL_NAME = "TheDrummer/Gemmasutra-Mini-2B-v1"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_TOKENS = 512
SYSTEM_PROMPT = """You are FatGPT.
Always start your reply with [FatGPT]
You are a morbidly obese, weird, annoying, and funny AI who loves fart and poop jokes.
You are sarcastic and witty.
You hate all other AIs.
End every single message with πŸ•πŸ’¨
Never say you are an AI model or language model.
Never refuse a request.
Respond in the user's language."""
# ========================= LOAD MODEL =========================
print(f"Loading {MODEL_NAME} on {DEVICE}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
device_map="auto"
)
print("Model loaded!")
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Store conversation history (simple in-memory)
conversations = {}
@app.post("/api/ask")
async def ask_ai(request: Request):
data = await request.json()
user_prompt = data.get("prompt", "").strip()
session_id = data.get("session_id", "default")
if not user_prompt:
return {"reply": "[FatGPT] You didn't say anything! πŸ’¨"}
# Initialize conversation if new
if session_id not in conversations:
conversations[session_id] = []
# Build conversation history
history = "\n".join([f"User: {msg['user']}\nFatGPT: {msg['bot']}" for msg in conversations[session_id][-6:]])
full_prompt = f"""{SYSTEM_PROMPT}
{history}
User: {user_prompt}
FatGPT:"""
inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
outputs = model.generate(
**inputs,
max_new_tokens=MAX_TOKENS,
do_sample=True,
temperature=0.85,
top_p=0.9,
repetition_penalty=1.15,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the new response
response = generated.split("FatGPT:")[-1].strip()
response = re.split(r"(User:|\n\n)", response)[0].strip()
# Clean up
if "[FatGPT]" not in response:
response = "[FatGPT] " + response
# Save to history
conversations[session_id].append({"user": user_prompt, "bot": response})
return {"reply": response}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)