Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from sentence_transformers import CrossEncoder | |
| import torch | |
| import requests | |
| # ------------------------------- | |
| # CONFIG | |
| # ------------------------------- | |
| HF_MODEL = "cross-encoder/ms-marco-MiniLM-L-12-v2" | |
| JINA_MODEL = "jina-reranker-m0" | |
| JINA_API_KEY = "jina_4075150fa702471c85ddea0a9ad4b306ouE7ymhrCpvxTxX3mScUv5LLDPKQ" | |
| JINA_ENDPOINT = "https://api.jina.ai/v1/rerank" | |
| # ------------------------------- | |
| # Load Hugging Face CrossEncoder | |
| # ------------------------------- | |
| hf_model = CrossEncoder(HF_MODEL) | |
| def rerank(query, docs_text): | |
| # Split input documents (one per line) | |
| docs = [d.strip() for d in docs_text.split("\n") if d.strip()] | |
| if not docs: | |
| return "β οΈ No documents provided." | |
| # ------------------------------- | |
| # Hugging Face CrossEncoder Scores | |
| # ------------------------------- | |
| hf_scores = hf_model.predict([(query, d) for d in docs]) | |
| hf_scores = [torch.sigmoid(torch.tensor(s)).item() for s in hf_scores] | |
| hf_ranking = sorted(zip(docs, hf_scores), key=lambda x: x[1], reverse=True) | |
| # ------------------------------- | |
| # Jina Reranker API Scores | |
| # ------------------------------- | |
| headers = { | |
| "Authorization": f"Bearer {JINA_API_KEY}", | |
| "Content-Type": "application/json", | |
| } | |
| payload = { | |
| "model": JINA_MODEL, | |
| "query": query, | |
| "documents": docs, | |
| } | |
| try: | |
| r = requests.post(JINA_ENDPOINT, headers=headers, json=payload, timeout=20) | |
| r.raise_for_status() | |
| results = r.json()["results"] | |
| jina_scores = [res["relevance_score"] for res in results] | |
| jina_ranking = sorted(zip(docs, jina_scores), key=lambda x: x[1], reverse=True) | |
| except Exception as e: | |
| jina_ranking = [("Error", str(e))] | |
| # ------------------------------- | |
| # Format output | |
| # ------------------------------- | |
| out = "### Hugging Face Ranking\n" | |
| for doc, score in hf_ranking: | |
| out += f"- ({score:.4f}) {doc}\n" | |
| out += "\n### Jina Reranker Ranking\n" | |
| for doc, score in jina_ranking: | |
| out += f"- ({score}) {doc}\n" | |
| return out | |
| # ------------------------------- | |
| # Simple UI | |
| # ------------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### π Query + Multiple Docs Reranking (HF vs Jina)") | |
| query = gr.Textbox(label="Query", lines=2, placeholder="Enter your query here...") | |
| docs = gr.Textbox( | |
| label="Candidate Documents (one per line)", | |
| lines=10, | |
| placeholder="Paste multiple document chunks here, each on a new line..." | |
| ) | |
| out = gr.Textbox(label="Ranked Results", lines=15) | |
| btn = gr.Button("Rerank π") | |
| btn.click(rerank, inputs=[query, docs], outputs=out) | |
| demo.launch() | |