Spaces:

Uunan
/

DeeDe

Sleeping

File size: 3,385 Bytes

d5b647f
d679acd
 
8662643
d679acd
 
ff656c3
d679acd
1b4b197
 
 
d679acd
 
 
80370a3
3dc3344
8c873e3
425a1e0
c1cec52
d679acd
 
 
 
8c873e3
d679acd
c1cec52
d679acd
 
 
 
c1cec52
d44dc78
d679acd
 
 
 
ff656c3
 
d679acd
 
 
 
 
 
c1cec52
d679acd
 
3dc3344
 
8662643
 
3dc3344
1b4b197
3dc3344
 
 
1b4b197
 
 
 
3dc3344
1b4b197
3dc3344
 
1b4b197
 
 
 
d679acd
 
8662643
3dc3344
 
 
 
 
d44dc78
 
 
 
 
ff656c3
d44dc78
 
 
 
 
ff656c3
d44dc78
 
 
 
 
 
8c873e3
d679acd
3dc3344
d679acd
 
d44dc78
d679acd
 
ff656c3
3dc3344
d679acd
 
 
3dc3344
d679acd
 
 
d44dc78
d679acd
 
80370a3
d44dc78
 
3dc3344
 
d44dc78
7a751fd

import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import logging
from fastapi.responses import StreamingResponse

# --- YENİ İZİN İÇİN IMPORT ---
from fastapi.middleware.cors import CORSMiddleware

# --- LOG AYARLARI ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- MODEL AYARLARI (RAG KALDIRILDI) ---
MODEL_REPO = "Qwen/Qwen2.5-3B-Instruct-GGUF"
MODEL_FILE = "qwen2.5-3b-instruct-q2_k.gguf"

# --- 1. ADIM: MODELİ İNDİR ---
logger.info(f"{MODEL_FILE} modeli Hugging Face Hub'dan indiriliyor...")
try:
    model_path = hf_hub_download(
        repo_id=MODEL_REPO,
        filename=MODEL_FILE
    )
    logger.info(f"Model başarıyla {model_path} adresine indirildi.")
except Exception as e:
    logger.error(f"Model indirilemedi: {e}")
    raise

# --- 2. ADIM: MODELİ YÜKLE ---
logger.info("GGUF modeli yükleniyor...")
try:
    llm = Llama(
        model_path=model_path,
        n_ctx=4096,
        n_gpu_layers=0,
        verbose=True
    )
    logger.info("Model başarıyla yüklendi.")
except Exception as e:
    logger.error(f"Model yüklenirken hata oluştu: {e}")
    raise

# --- 3. ADIM: FastAPI UYGULAMASI ---
app = FastAPI(
    title="Qwen 2.5 API (Streaming - No RAG)",
    description="Sadece Qwen 2.5 modelini stream eder."
)

# --- 4. ADIM: CORS (ÇAPRAZ KAYNAK) AYARLARI ---
origins = [
    "https://deede.tr",
    "http://deede.tr",
    "*"  # Tüm adreslere izin ver (en kolayı)
]

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
# --- CORS İZİN KODU BİTTİ ---


class ChatRequest(BaseModel):
    prompt: str

# --- 5. ADIM: BASİT STREAM GENERATOR (RAG KALDIRILDI) ---
async def stream_generator(messages):
    """
    Sadece LLM'den gelen token'ları stream eder.
    """
    try:
        stream = llm.create_chat_completion(
            messages=messages,
            max_tokens=1024,
            temperature=0.7,
            stream=True
        )
        
        for chunk in stream:
            content = chunk['choices'][0]['delta'].get('content', None)
            if content:
                yield content

    except Exception as e:
        logger.error(f"LLM stream hatası: {e}")
        yield f" [LLM STREAM HATASI: {e}]"


@app.get("/")
def read_root():
    return {"status": "Streaming API çalışıyor (No RAG)", "model_repo": MODEL_REPO}

@app.post("/api/chat")
async def chat_with_rag(request: ChatRequest):
    user_prompt = request.prompt
    logger.info(f"Gelen prompt: {user_prompt}")
    
    # --- LLM PROMPT'U HAZIRLA (RAG CONTEXT'İ OLMADAN) ---
    messages = [
        {
            "role": "system",
            "content": "Sen Qwen, Alibaba Cloud tarafından yaratılmış bir yapay zeka asistansısın. Sana sorulan sorulara yardımcı olacak şekilde cevap ver."
        },
        {
            "role": "user",
            "content": user_prompt
        }
    ]

    # --- STREAM'İ BAŞLAT ---
    return StreamingResponse(
        stream_generator(messages), 
        media_type="text/plain" # Kaynak URL'i artık göndermiyoruz
    )

# --- 7. ADIM: UYGULAMAYI BAŞLAT ---
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)