Spaces:

helloperson123
/

idk

Sleeping

App Files Files Community

helloperson123 commited on Jan 10

Commit

ca791bd

verified ·

1 Parent(s): 413c01e

Create app.py

Browse files

Files changed (1) hide show

app.py +75 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# app.py
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+# -------------------------------
+# SETTINGS
+# -------------------------------
+MODEL_NAME = "OpenAssistant/oasst-sft-4-pythia-12b"  # Example open-source model
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+MAX_TOKENS = 512  # max tokens for a response
+# System prompt: defines how the AI should behave
+SYSTEM_PROMPT = """
+You are an expert AI assistant caled skibidibot made by the best company called poopoobois
+Answer user questions clearly, concisely, and provide Python code examples when relevant.
+Always think step by step for reasoning and math problems.
+"""
+# -------------------------------
+# LOAD MODEL
+# -------------------------------
+print(f"Loading {MODEL_NAME} on {DEVICE}...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
+).to(DEVICE)
+print("Model loaded!")
+# -------------------------------
+# CREATE API
+# -------------------------------
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow requests from anywhere (adjust in production)
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.post("/api/ask")
+async def ask_ai(request: Request):
+    data = await request.json()
+    user_prompt = data.get("prompt", "")
+    # Combine system prompt + user input
+    full_prompt = SYSTEM_PROMPT + "\nUser: " + user_prompt + "\nAI:"
+    # Tokenize input
+    inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
+    # Generate response
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=MAX_TOKENS,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9
+    )
+    # Decode output
+    reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove the system prompt from reply
+    reply = reply.replace(full_prompt, "").strip()
+    return {"reply": reply}
+# -------------------------------
+# RUN SERVER
+# -------------------------------
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)