File size: 1,828 Bytes
a73b693 834a493 a73b693 35f8a63 e95e89a 7f61b49 35f8a63 7f61b49 e787f52 b427f08 d1e0f82 7f61b49 d1e0f82 c7f5804 7f61b49 a26a440 7f61b49 5a5cee8 7f61b49 c7f5804 7f61b49 7146db8 e787f52 7146db8 7f61b49 727ba2c 3751dd8 8f8741a 3751dd8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import os
os.environ["HF_HOME"] = "/home/appuser/.cache/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/home/appuser/.cache/huggingface"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/home/appuser/.cache/huggingface"
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
MODEL_NAME = "Qwen/Qwen3-4B-Instruct-2507" # "ynsbyrm/clone-c4ai-command-a-03-2025" # kendi model repo adını burada tut
app = FastAPI()
print("🔄 Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
print("🔄 Loading model in 4-bit...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto",
torch_dtype=torch.float16,
load_in_4bit=True, # <--- burada sihir var
trust_remote_code=True
)
print("✅ Model loaded.")
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=200,
do_sample=True,
temperature=0.7
)
class Request(BaseModel):
prompt: str
@app.get("/")
def root():
return {"message": "Model Ready 🚀"}
@app.post("/generate")
def generate_text(data: Request):
prompt = tokenizer.apply_chat_template(
[
{"role": "system", "content": "Sen kullanıcıya yardımcı bir asistanısın."},
{"role": "user", "content": data.prompt}
],
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=200)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
result = pipe(prompt)[0]
#decoded_result = tokenizer.decode(result, skip_special_tokens=True)
return {"response": result,"text": text}
|