Zenkad commited on
Commit
316c8a8
·
verified ·
1 Parent(s): 35e2c17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -18
app.py CHANGED
@@ -1,25 +1,30 @@
 
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
 
3
  from pydantic import BaseModel
4
- import torch
5
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
 
6
 
7
- # =========================
8
- # AÇIK MODEL (GATED DEĞİL)
9
- # =========================
10
  MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
11
 
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  MODEL_ID,
15
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
16
  device_map="auto"
17
  )
18
 
19
- # =========================
20
  # FASTAPI
21
- # =========================
22
- app = FastAPI()
 
23
 
24
  app.add_middleware(
25
  CORSMiddleware,
@@ -28,33 +33,59 @@ app.add_middleware(
28
  allow_headers=["*"],
29
  )
30
 
31
- class ChatReq(BaseModel):
 
 
 
 
32
  message: str
33
 
 
 
 
 
34
  @app.get("/")
35
  def root():
36
  return {
37
  "status": "ok",
38
- "service": "ZenkaMind API",
39
  "model": MODEL_ID
40
  }
41
 
42
  @app.post("/api/chat")
43
- def chat(req: ChatReq):
44
- prompt = f"<|user|>\n{req.message}\n<|assistant|>\n"
 
 
 
 
 
 
 
45
 
46
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
47
 
48
  with torch.no_grad():
49
- output = model.generate(
50
  **inputs,
51
- max_new_tokens=256,
52
  do_sample=True,
53
  temperature=0.7,
54
  top_p=0.9
55
  )
56
 
57
- text = tokenizer.decode(output[0], skip_special_tokens=True)
58
- answer = text.split("<|assistant|>")[-1].strip()
 
 
 
 
 
 
 
 
 
59
 
60
- return {"response": answer}
 
 
 
1
+ import os
2
  from fastapi import FastAPI
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.responses import JSONResponse
5
  from pydantic import BaseModel
6
+
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
+ import torch
9
+
10
+ # ===============================
11
+ # MODEL AYARLARI (GATED DEĞİL)
12
+ # ===============================
13
 
 
 
 
14
  MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
15
 
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
17
  model = AutoModelForCausalLM.from_pretrained(
18
  MODEL_ID,
19
+ torch_dtype=torch.float32,
20
  device_map="auto"
21
  )
22
 
23
+ # ===============================
24
  # FASTAPI
25
+ # ===============================
26
+
27
+ app = FastAPI(title="ZenkaMind API Test")
28
 
29
  app.add_middleware(
30
  CORSMiddleware,
 
33
  allow_headers=["*"],
34
  )
35
 
36
+ # ===============================
37
+ # MODELLER
38
+ # ===============================
39
+
40
+ class ChatRequest(BaseModel):
41
  message: str
42
 
43
+ # ===============================
44
+ # ENDPOINTLER
45
+ # ===============================
46
+
47
  @app.get("/")
48
  def root():
49
  return {
50
  "status": "ok",
51
+ "service": "ZenkaMind API Test",
52
  "model": MODEL_ID
53
  }
54
 
55
  @app.post("/api/chat")
56
+ def chat(body: ChatRequest):
57
+ user_input = body.message.strip()
58
+ if not user_input:
59
+ return JSONResponse({"response": "Mesaj boş olamaz."})
60
+
61
+ prompt = f"""Sen ZenkaMind isimli Türkçe konuşan bir yapay zeka asistanısın.
62
+
63
+ Kullanıcı: {user_input}
64
+ ZenkaMind:"""
65
 
66
+ inputs = tokenizer(prompt, return_tensors="pt")
67
 
68
  with torch.no_grad():
69
+ outputs = model.generate(
70
  **inputs,
71
+ max_new_tokens=200,
72
  do_sample=True,
73
  temperature=0.7,
74
  top_p=0.9
75
  )
76
 
77
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
78
+
79
+ # Sadece cevabı ayıkla
80
+ if "ZenkaMind:" in text:
81
+ text = text.split("ZenkaMind:")[-1].strip()
82
+
83
+ return JSONResponse({"response": text})
84
+
85
+ # ===============================
86
+ # SERVER (ÇOK KRİTİK)
87
+ # ===============================
88
 
89
+ if __name__ == "__main__":
90
+ import uvicorn
91
+ uvicorn.run(app, host="0.0.0.0", port=7860)