Spaces:

ReallyFloppyPenguin
/

Qwen3-Reranker-0.6B

Running

App Files Files Community

ReallyFloppyPenguin commited on Jun 10, 2025

Commit

8150f4d

verified ·

1 Parent(s): b957fc7

Create app.py

Browse files

Files changed (1) hide show

app.py +386 -0

app.py ADDED Viewed

	@@ -0,0 +1,386 @@

+import gradio as gr
+import torch
+from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
+import logging
+from typing import List, Tuple
+import pandas as pd
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class Qwen3Reranker:
+    def __init__(self, model_name="Qwen/Qwen3-Reranker-0.6B"):
+        self.model_name = model_name
+        self.tokenizer = None
+        self.model = None
+        self.token_false_id = None
+        self.token_true_id = None
+        self.max_length = 8192
+        self.prefix_tokens = None
+        self.suffix_tokens = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self._load_model()
+    def _load_model(self):
+        """Load the tokenizer and model"""
+        try:
+            logger.info(f"Loading {self.model_name}...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                padding_side='left'
+            )
+            # Load model with appropriate settings
+            if torch.cuda.is_available():
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    torch_dtype=torch.float16,
+                    device_map="auto"
+                ).eval()
+            else:
+                self.model = AutoModelForCausalLM.from_pretrained(
+                    self.model_name
+                ).eval()
+            # Set up tokens
+            self.token_false_id = self.tokenizer.convert_tokens_to_ids("no")
+            self.token_true_id = self.tokenizer.convert_tokens_to_ids("yes")
+            # Set up prefix and suffix
+            prefix = "<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be \"yes\" or \"no\".<|im_end|>\n<|im_start|>user\n"
+            suffix = "<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n"
+            self.prefix_tokens = self.tokenizer.encode(prefix, add_special_tokens=False)
+            self.suffix_tokens = self.tokenizer.encode(suffix, add_special_tokens=False)
+            logger.info("Model loaded successfully!")
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            raise e
+    def format_instruction(self, instruction: str, query: str, doc: str) -> str:
+        """Format the instruction for the reranker"""
+        if instruction is None or instruction.strip() == "":
+            instruction = 'Given a web search query, retrieve relevant passages that answer the query'
+        return f"<Instruct>: {instruction}\n<Query>: {query}\n<Document>: {doc}"
+    def process_inputs(self, pairs: List[str]) -> dict:
+        """Process input pairs for the model"""
+        inputs = self.tokenizer(
+            pairs,
+            padding=False,
+            truncation='longest_first',
+            return_attention_mask=False,
+            max_length=self.max_length - len(self.prefix_tokens) - len(self.suffix_tokens)
+        )
+        for i, ele in enumerate(inputs['input_ids']):
+            inputs['input_ids'][i] = self.prefix_tokens + ele + self.suffix_tokens
+        inputs = self.tokenizer.pad(
+            inputs,
+            padding=True,
+            return_tensors="pt",
+            max_length=self.max_length
+        )
+        for key in inputs:
+            inputs[key] = inputs[key].to(self.model.device)
+        return inputs
+    @torch.no_grad()
+    def compute_scores(self, inputs: dict) -> List[float]:
+        """Compute relevance scores"""
+        batch_scores = self.model(**inputs).logits[:, -1, :]
+        true_vector = batch_scores[:, self.token_true_id]
+        false_vector = batch_scores[:, self.token_false_id]
+        batch_scores = torch.stack([false_vector, true_vector], dim=1)
+        batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
+        scores = batch_scores[:, 1].exp().tolist()
+        return scores
+    def rank_documents(self, query: str, documents: List[str], instruction: str = None) -> List[Tuple[str, float]]:
+        """Rank documents by relevance to query"""
+        if not documents or not query.strip():
+            return []
+        # Format inputs
+        pairs = [
+            self.format_instruction(instruction, query, doc)
+            for doc in documents
+        ]
+        # Process and score
+        inputs = self.process_inputs(pairs)
+        scores = self.compute_scores(inputs)
+        # Combine documents with scores and sort
+        doc_scores = list(zip(documents, scores))
+        doc_scores.sort(key=lambda x: x[1], reverse=True)
+        return doc_scores
+# Initialize the reranker
+try:
+    reranker = Qwen3Reranker()
+    model_loaded = True
+except Exception as e:
+    logger.error(f"Failed to initialize reranker: {e}")
+    model_loaded = False
+    reranker = None
+def rerank_documents(query: str, documents_text: str, instruction: str = None) -> tuple:
+    """
+    Rerank documents based on query relevance
+    Args:
+        query: The search query
+        documents_text: Documents separated by newlines or numbered
+        instruction: Custom instruction (optional)
+    Returns:
+        Tuple of (formatted results table, download data)
+    """
+    if not model_loaded:
+        return "❌ Model not loaded. Please check the logs.", None
+    if not query.strip():
+        return "❌ Please enter a query.", None
+    if not documents_text.strip():
+        return "❌ Please enter at least one document.", None
+    try:
+        # Parse documents
+        documents = []
+        lines = documents_text.strip().split('\n')
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            # Remove numbering if present (e.g., "1. Document text" -> "Document text")
+            if line and line[0].isdigit() and '. ' in line:
+                line = line.split('. ', 1)[1]
+            documents.append(line)
+        if not documents:
+            return "❌ No valid documents found.", None
+        # Rank documents
+        ranked_docs = reranker.rank_documents(query, documents, instruction)
+        # Create results
+        results_data = []
+        for i, (doc, score) in enumerate(ranked_docs, 1):
+            results_data.append({
+                "Rank": i,
+                "Score": f"{score:.4f}",
+                "Document": doc[:200] + "..." if len(doc) > 200 else doc,
+                "Full Document": doc
+            })
+        # Create display table
+        df_display = pd.DataFrame([
+            {"Rank": item["Rank"], "Score": item["Score"], "Document": item["Document"]}
+            for item in results_data
+        ])
+        # Create download data
+        df_download = pd.DataFrame([
+            {"Rank": item["Rank"], "Score": item["Score"], "Document": item["Full Document"]}
+            for item in results_data
+        ])
+        return df_display, df_download
+    except Exception as e:
+        logger.error(f"Error in reranking: {e}")
+        return f"❌ Error during reranking: {str(e)}", None
+def create_gradio_interface():
+    """Create the Gradio interface"""
+    with gr.Blocks(
+        title="Qwen3-Reranker-0.6B",
+        theme=gr.themes.Soft(),
+        css="""
+        .main-header {
+            text-align: center;
+            margin-bottom: 2rem;
+        }
+        .model-info {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 1rem;
+            border-radius: 10px;
+            margin-bottom: 1rem;
+        }
+        .example-box {
+            border: 1px solid #e0e0e0;
+            padding: 1rem;
+            border-radius: 8px;
+            margin: 0.5rem 0;
+        }
+        """
+    ) as demo:
+        gr.HTML("""
+        <div class="main-header">
+            <h1>🔍 Qwen3-Reranker-0.6B</h1>
+            <p>Advanced Text Reranking with Multilingual Support</p>
+        </div>
+        """)
+        with gr.Row():
+            with gr.Column():
+                gr.HTML("""
+                <div class="model-info">
+                    <h3>🚀 Model Information</h3>
+                    <ul>
+                        <li><strong>Model:</strong> Qwen3-Reranker-0.6B</li>
+                        <li><strong>Parameters:</strong> 0.6B</li>
+                        <li><strong>Context Length:</strong> 32K tokens</li>
+                        <li><strong>Languages:</strong> 100+ languages supported</li>
+                        <li><strong>Use Case:</strong> Document ranking and relevance scoring</li>
+                    </ul>
+                </div>
+                """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("<h3>📝 Input</h3>")
+                query_input = gr.Textbox(
+                    label="Search Query",
+                    placeholder="Enter your search query here...",
+                    lines=2,
+                    value="What is the capital of China?"
+                )
+                instruction_input = gr.Textbox(
+                    label="Custom Instruction (Optional)",
+                    placeholder="Leave empty for default instruction...",
+                    lines=2,
+                    value=""
+                )
+                documents_input = gr.Textbox(
+                    label="Documents to Rank",
+                    placeholder="Enter documents, one per line or numbered...",
+                    lines=8,
+                    value="""The capital of China is Beijing.
+China is a country in East Asia with a large population.
+Beijing is located in northern China and serves as the political center.
+Shanghai is the largest city in China by population.
+The Great Wall of China is a famous landmark."""
+                )
+                with gr.Row():
+                    rank_btn = gr.Button("🔍 Rank Documents", variant="primary", size="lg")
+                    clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+            with gr.Column(scale=1):
+                gr.HTML("<h3>📊 Results</h3>")
+                results_display = gr.DataFrame(
+                    label="Ranked Documents",
+                    headers=["Rank", "Score", "Document"],
+                    interactive=False,
+                    height=400
+                )
+                download_data = gr.State()
+                download_btn = gr.DownloadButton(
+                    "💾 Download Results (CSV)",
+                    visible=False
+                )
+        # Examples section
+        gr.HTML("<h3>💡 Examples</h3>")
+        with gr.Row():
+            with gr.Column():
+                gr.HTML("""
+                <div class="example-box">
+                    <h4>Example 1: General Search</h4>
+                    <p><strong>Query:</strong> "Python programming tutorials"</p>
+                    <p><strong>Documents:</strong> Various programming resources</p>
+                </div>
+                """)
+            with gr.Column():
+                gr.HTML("""
+                <div class="example-box">
+                    <h4>Example 2: Scientific Research</h4>
+                    <p><strong>Query:</strong> "Machine learning applications in healthcare"</p>
+                    <p><strong>Documents:</strong> Research papers and articles</p>
+                </div>
+                """)
+        def update_interface(query, documents, instruction):
+            if not model_loaded:
+                return "❌ Model not loaded", None, gr.update(visible=False)
+            results, download_df = rerank_documents(query, documents, instruction)
+            if download_df is not None:
+                return results, download_df, gr.update(visible=True)
+            else:
+                return results, None, gr.update(visible=False)
+        def clear_inputs():
+            return "", "", "", None, None, gr.update(visible=False)
+        def download_csv(download_df):
+            if download_df is not None:
+                return download_df.to_csv(index=False)
+            return None
+        # Event handlers
+        rank_btn.click(
+            fn=update_interface,
+            inputs=[query_input, documents_input, instruction_input],
+            outputs=[results_display, download_data, download_btn]
+        )
+        clear_btn.click(
+            fn=clear_inputs,
+            outputs=[query_input, documents_input, instruction_input, results_display, download_data, download_btn]
+        )
+        download_btn.click(
+            fn=download_csv,
+            inputs=[download_data],
+            outputs=[download_btn]
+        )
+        # Footer
+        gr.HTML("""
+        <div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #e0e0e0;">
+            <p>🤗 <a href="https://huggingface.co/Qwen/Qwen3-Reranker-0.6B" target="_blank">Model on Hugging Face</a> |
+            📖 <a href="https://arxiv.org/abs/2506.05176" target="_blank">Research Paper</a></p>
+            <p><em>Powered by Qwen3-Reranker-0.6B - Advanced multilingual text reranking</em></p>
+        </div>
+        """)
+    return demo
+if __name__ == "__main__":
+    # Create and launch the interface
+    demo = create_gradio_interface()
+    # Launch with appropriate settings
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        debug=True,
+        show_error=True
+    )