Spaces:
Sleeping
Sleeping
File size: 2,730 Bytes
3bf3346 4ffa42b 7ec684a ec4ed0d 7ec684a ec4ed0d 7ec684a ec4ed0d 7ec684a ec4ed0d 7ec684a ec4ed0d 7ec684a ec4ed0d 7ec684a ec4ed0d 7ec684a ec4ed0d 063bf3b ec4ed0d 33e4eda 7ff30bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import gradio as gr
from sentence_transformers import CrossEncoder
import torch
import requests
# -------------------------------
# CONFIG
# -------------------------------
HF_MODEL = "cross-encoder/ms-marco-MiniLM-L-12-v2"
JINA_MODEL = "jina-reranker-m0"
JINA_API_KEY = "jina_4075150fa702471c85ddea0a9ad4b306ouE7ymhrCpvxTxX3mScUv5LLDPKQ"
JINA_ENDPOINT = "https://api.jina.ai/v1/rerank"
# -------------------------------
# Load Hugging Face CrossEncoder
# -------------------------------
hf_model = CrossEncoder(HF_MODEL)
def rerank(query, docs_text):
# Split input documents (one per line)
docs = [d.strip() for d in docs_text.split("\n") if d.strip()]
if not docs:
return "β οΈ No documents provided."
# -------------------------------
# Hugging Face CrossEncoder Scores
# -------------------------------
hf_scores = hf_model.predict([(query, d) for d in docs])
hf_scores = [torch.sigmoid(torch.tensor(s)).item() for s in hf_scores]
hf_ranking = sorted(zip(docs, hf_scores), key=lambda x: x[1], reverse=True)
# -------------------------------
# Jina Reranker API Scores
# -------------------------------
headers = {
"Authorization": f"Bearer {JINA_API_KEY}",
"Content-Type": "application/json",
}
payload = {
"model": JINA_MODEL,
"query": query,
"documents": docs,
}
try:
r = requests.post(JINA_ENDPOINT, headers=headers, json=payload, timeout=20)
r.raise_for_status()
results = r.json()["results"]
jina_scores = [res["relevance_score"] for res in results]
jina_ranking = sorted(zip(docs, jina_scores), key=lambda x: x[1], reverse=True)
except Exception as e:
jina_ranking = [("Error", str(e))]
# -------------------------------
# Format output
# -------------------------------
out = "### Hugging Face Ranking\n"
for doc, score in hf_ranking:
out += f"- ({score:.4f}) {doc}\n"
out += "\n### Jina Reranker Ranking\n"
for doc, score in jina_ranking:
out += f"- ({score}) {doc}\n"
return out
# -------------------------------
# Simple UI
# -------------------------------
with gr.Blocks() as demo:
gr.Markdown("### π Query + Multiple Docs Reranking (HF vs Jina)")
query = gr.Textbox(label="Query", lines=2, placeholder="Enter your query here...")
docs = gr.Textbox(
label="Candidate Documents (one per line)",
lines=10,
placeholder="Paste multiple document chunks here, each on a new line..."
)
out = gr.Textbox(label="Ranked Results", lines=15)
btn = gr.Button("Rerank π")
btn.click(rerank, inputs=[query, docs], outputs=out)
demo.launch()
|