File size: 3,385 Bytes
d5b647f d679acd 8662643 d679acd ff656c3 d679acd 1b4b197 d679acd 80370a3 3dc3344 8c873e3 425a1e0 c1cec52 d679acd 8c873e3 d679acd c1cec52 d679acd c1cec52 d44dc78 d679acd ff656c3 d679acd c1cec52 d679acd 3dc3344 8662643 3dc3344 1b4b197 3dc3344 1b4b197 3dc3344 1b4b197 3dc3344 1b4b197 d679acd 8662643 3dc3344 d44dc78 ff656c3 d44dc78 ff656c3 d44dc78 8c873e3 d679acd 3dc3344 d679acd d44dc78 d679acd ff656c3 3dc3344 d679acd 3dc3344 d679acd d44dc78 d679acd 80370a3 d44dc78 3dc3344 d44dc78 7a751fd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import logging
from fastapi.responses import StreamingResponse
# --- YENİ İZİN İÇİN IMPORT ---
from fastapi.middleware.cors import CORSMiddleware
# --- LOG AYARLARI ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- MODEL AYARLARI (RAG KALDIRILDI) ---
MODEL_REPO = "Qwen/Qwen2.5-3B-Instruct-GGUF"
MODEL_FILE = "qwen2.5-3b-instruct-q2_k.gguf"
# --- 1. ADIM: MODELİ İNDİR ---
logger.info(f"{MODEL_FILE} modeli Hugging Face Hub'dan indiriliyor...")
try:
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE
)
logger.info(f"Model başarıyla {model_path} adresine indirildi.")
except Exception as e:
logger.error(f"Model indirilemedi: {e}")
raise
# --- 2. ADIM: MODELİ YÜKLE ---
logger.info("GGUF modeli yükleniyor...")
try:
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_gpu_layers=0,
verbose=True
)
logger.info("Model başarıyla yüklendi.")
except Exception as e:
logger.error(f"Model yüklenirken hata oluştu: {e}")
raise
# --- 3. ADIM: FastAPI UYGULAMASI ---
app = FastAPI(
title="Qwen 2.5 API (Streaming - No RAG)",
description="Sadece Qwen 2.5 modelini stream eder."
)
# --- 4. ADIM: CORS (ÇAPRAZ KAYNAK) AYARLARI ---
origins = [
"https://deede.tr",
"http://deede.tr",
"*" # Tüm adreslere izin ver (en kolayı)
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- CORS İZİN KODU BİTTİ ---
class ChatRequest(BaseModel):
prompt: str
# --- 5. ADIM: BASİT STREAM GENERATOR (RAG KALDIRILDI) ---
async def stream_generator(messages):
"""
Sadece LLM'den gelen token'ları stream eder.
"""
try:
stream = llm.create_chat_completion(
messages=messages,
max_tokens=1024,
temperature=0.7,
stream=True
)
for chunk in stream:
content = chunk['choices'][0]['delta'].get('content', None)
if content:
yield content
except Exception as e:
logger.error(f"LLM stream hatası: {e}")
yield f" [LLM STREAM HATASI: {e}]"
@app.get("/")
def read_root():
return {"status": "Streaming API çalışıyor (No RAG)", "model_repo": MODEL_REPO}
@app.post("/api/chat")
async def chat_with_rag(request: ChatRequest):
user_prompt = request.prompt
logger.info(f"Gelen prompt: {user_prompt}")
# --- LLM PROMPT'U HAZIRLA (RAG CONTEXT'İ OLMADAN) ---
messages = [
{
"role": "system",
"content": "Sen Qwen, Alibaba Cloud tarafından yaratılmış bir yapay zeka asistansısın. Sana sorulan sorulara yardımcı olacak şekilde cevap ver."
},
{
"role": "user",
"content": user_prompt
}
]
# --- STREAM'İ BAŞLAT ---
return StreamingResponse(
stream_generator(messages),
media_type="text/plain" # Kaynak URL'i artık göndermiyoruz
)
# --- 7. ADIM: UYGULAMAYI BAŞLAT ---
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |