| import os |
| from fastapi import FastAPI, HTTPException, Depends, Header |
| from pydantic import BaseModel |
| from typing import List, Optional |
| from huggingface_hub import hf_hub_download |
| from llama_cpp import Llama |
| import uvicorn |
|
|
| app = FastAPI() |
|
|
| |
| |
| MODEL_REPO = "mradermacher/Qwen3-Reranker-0.6B-GGUF" |
| MODEL_FILE = "Qwen3-Reranker-0.6B.Q5_K_M.gguf" |
| |
| MY_API_KEY = os.getenv("API_KEY", "1qazxsw2") |
|
|
| |
| print("Downloading model from Hugging Face Hub...") |
| hf_token = os.getenv("HF_TOKEN") |
| model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, token=hf_token) |
|
|
| print("Loading model via llama.cpp...") |
| llm = Llama( |
| model_path=model_path, |
| n_ctx=2048, |
| n_threads=2, |
| verbose=False |
| ) |
|
|
| |
| class RerankRequest(BaseModel): |
| query: str |
| documents: List[str] |
| top_n: Optional[int] = None |
|
|
| class ModelList(BaseModel): |
| object: str = "list" |
| data: list |
|
|
| |
| async def verify_api_key(authorization: str = Header(None)): |
| if not authorization or authorization != f"Bearer {MY_API_KEY}": |
| raise HTTPException(status_code=401, detail="Unauthorized: Invalid API Key") |
|
|
| |
| @app.get("/v1/models") |
| async def list_models(): |
| return { |
| "object": "list", |
| "data": [ |
| { |
| "id": "qwen3-reranker-0.6b", |
| "object": "model", |
| "created": 1700000000, |
| "owned_by": "huggingface" |
| } |
| ] |
| } |
|
|
| @app.post("/v1/rerank", dependencies=[Depends(verify_api_key)]) |
| async def rerank(request: RerankRequest): |
| query = request.query |
| documents = request.documents |
| |
| results = [] |
| for idx, doc in enumerate(documents): |
| |
| |
| |
| prompt = f"Query: {query}\nDocument: {doc}\nScore the relevance from 0 to 100:" |
| |
| |
| response = llm( |
| prompt, |
| max_tokens=2, |
| stop=["\n"], |
| echo=False |
| ) |
| |
| try: |
| |
| text_output = response['choices'][0]['text'].strip() |
| score = float(text_output) if text_output.isdigit() else 0.0 |
| except: |
| score = 0.0 |
|
|
| results.append({ |
| "index": idx, |
| "document": doc, |
| "relevance_score": score |
| }) |
| |
| |
| results.sort(key=lambda x: x["relevance_score"], reverse=True) |
| |
| |
| if request.top_n is not None: |
| results = results[:request.top_n] |
| |
| return {"results": results} |
|
|
| |
| @app.get("/") |
| def read_root(): |
| return {"status": "running", "model": MODEL_FILE} |
|
|
| if __name__ == "__main__": |
| |
| uvicorn.run(app, host="0.0.0.0", port=7860) |