helloperson123 commited on
Commit
ca791bd
·
verified ·
1 Parent(s): 413c01e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ from fastapi import FastAPI, Request
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ import uvicorn
7
+
8
+ # -------------------------------
9
+ # SETTINGS
10
+ # -------------------------------
11
+ MODEL_NAME = "OpenAssistant/oasst-sft-4-pythia-12b" # Example open-source model
12
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
+ MAX_TOKENS = 512 # max tokens for a response
14
+
15
+ # System prompt: defines how the AI should behave
16
+ SYSTEM_PROMPT = """
17
+ You are an expert AI assistant caled skibidibot made by the best company called poopoobois
18
+ Answer user questions clearly, concisely, and provide Python code examples when relevant.
19
+ Always think step by step for reasoning and math problems.
20
+ """
21
+
22
+ # -------------------------------
23
+ # LOAD MODEL
24
+ # -------------------------------
25
+ print(f"Loading {MODEL_NAME} on {DEVICE}...")
26
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
27
+ model = AutoModelForCausalLM.from_pretrained(
28
+ MODEL_NAME,
29
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
30
+ ).to(DEVICE)
31
+ print("Model loaded!")
32
+
33
+ # -------------------------------
34
+ # CREATE API
35
+ # -------------------------------
36
+ app = FastAPI()
37
+ app.add_middleware(
38
+ CORSMiddleware,
39
+ allow_origins=["*"], # Allow requests from anywhere (adjust in production)
40
+ allow_methods=["*"],
41
+ allow_headers=["*"],
42
+ )
43
+
44
+ @app.post("/api/ask")
45
+ async def ask_ai(request: Request):
46
+ data = await request.json()
47
+ user_prompt = data.get("prompt", "")
48
+
49
+ # Combine system prompt + user input
50
+ full_prompt = SYSTEM_PROMPT + "\nUser: " + user_prompt + "\nAI:"
51
+
52
+ # Tokenize input
53
+ inputs = tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
54
+
55
+ # Generate response
56
+ outputs = model.generate(
57
+ **inputs,
58
+ max_new_tokens=MAX_TOKENS,
59
+ do_sample=True,
60
+ temperature=0.7,
61
+ top_p=0.9
62
+ )
63
+
64
+ # Decode output
65
+ reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
66
+
67
+ # Remove the system prompt from reply
68
+ reply = reply.replace(full_prompt, "").strip()
69
+ return {"reply": reply}
70
+
71
+ # -------------------------------
72
+ # RUN SERVER
73
+ # -------------------------------
74
+ if __name__ == "__main__":
75
+ uvicorn.run(app, host="0.0.0.0", port=7860)