mtyrrell commited on
Commit
78efc3f
·
1 Parent(s): 844447b

port of generator

Browse files
Files changed (8) hide show
  1. .gitignore +1 -0
  2. Dockerfile +23 -0
  3. README.md +15 -5
  4. app/main.py +39 -0
  5. app/reranker.py +41 -0
  6. app/utils.py +16 -0
  7. params.cfg +4 -0
  8. requirements.txt +21 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_Store
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -------- base image --------
2
+ FROM python:3.11-slim
3
+
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ OMP_NUM_THREADS=1 \
6
+ TOKENIZERS_PARALLELISM=false
7
+ #GRADIO_MCP_SERVER=True
8
+
9
+ # -------- install deps --------
10
+ WORKDIR /app
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # -------- copy source --------
15
+ COPY app ./app
16
+ COPY params.cfg .
17
+ COPY .env* ./
18
+
19
+ # Ports:
20
+ # • 7860 → Gradio UI (HF Spaces standard)
21
+ EXPOSE 7860
22
+
23
+ CMD ["python", "-m", "app.main"]
README.md CHANGED
@@ -1,10 +1,20 @@
1
  ---
2
- title: Chatfed Reranker
3
- emoji: 🌍
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: ChatFed Re-Ranker Service
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
  ---
10
 
11
+ # ReRanker Module
12
+
13
+ This is an LLM-based generation service designed to be deployed as a modular component of a broader RAG system. The service runs on a docker container and exposes a gradio UI on port 7860 as well as an MCP endpoint.
14
+
15
+ ## Configuration
16
+
17
+ 1. The module requires an API key (set as an environment variable) for a model provider to run. Make sure to set the appropriate environment variables:
18
+ - HuggingFace: `HF_TOKEN`
19
+
20
+ 2. Inference provider and model settings are accessible via params.cfg
app/main.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from .reranker import rerank_context
3
+
4
+ # ---------------------------------------------------------------------
5
+ # Gradio Interface with MCP support
6
+ # ---------------------------------------------------------------------
7
+ ui = gr.Interface(
8
+ fn=rerank_context,
9
+ inputs=[
10
+ gr.Textbox(
11
+ label="Query",
12
+ lines=2,
13
+ placeholder="Paste user query here",
14
+ info="Enter user query"
15
+ ),
16
+ gr.Textbox(
17
+ label="Retrieved Context",
18
+ lines=8,
19
+ placeholder="Paste retrieved context here",
20
+ info="Provide the context/documents to use for reranking"
21
+ ),
22
+ ],
23
+ outputs=gr.Textbox(
24
+ label="Reranked Context",
25
+ lines=6,
26
+ show_copy_button=True
27
+ ),
28
+ title="RAG Reranking Service UI",
29
+ description="Reranks previously retrieved context. Intended for use in RAG pipelines (i.e. context supplied by semantic retriever service) as an MCP server.",
30
+ )
31
+
32
+ # Launch with MCP server enabled
33
+ if __name__ == "__main__":
34
+ ui.launch(
35
+ server_name="0.0.0.0",
36
+ server_port=7860,
37
+ mcp_server=True,
38
+ show_error=True
39
+ )
app/reranker.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any
2
+ from langchain.retrievers.document_compressors import CrossEncoderReranker
3
+ from langchain_community.cross_encoders import HuggingFaceCrossEncoder
4
+ from langchain.schema import Document
5
+ from .utils import getconfig
6
+
7
+ config = getconfig("params.cfg")
8
+
9
+ # load ranker settings from your existing config
10
+
11
+ RANKER_MODEL = config.get("reranker", "MODEL")
12
+ RANKER_TOP_K = int(config.get("reranker", "TOP_K"))
13
+
14
+ def rerank_context(
15
+ query: str,
16
+ contexts: List[Dict[str, Any]],
17
+ top_n: int = None
18
+ ) -> List[Dict[str, Any]]:
19
+ """
20
+ Re-ranks a list of context dicts (each with 'page_content' & 'metadata')
21
+ using a cross-encoder and returns the top_n sorted results.
22
+ """
23
+ # wrap into LangChain Documents
24
+ docs = [
25
+ Document(page_content=c["page_content"], metadata=c.get("metadata", {}))
26
+ for c in contexts
27
+ ]
28
+
29
+ # instantiate reranker
30
+ n = top_n or RANKER_TOP_K
31
+ model = HuggingFaceCrossEncoder(model_name=RANKER_MODEL)
32
+ reranker = CrossEncoderReranker(model=model, top_n=n)
33
+
34
+ # perform reranking
35
+ reranked: List[Document] = reranker.rerank(query, docs)
36
+
37
+ # return as plain dicts
38
+ return [
39
+ {"page_content": d.page_content, "metadata": d.metadata}
40
+ for d in reranked
41
+ ]
app/utils.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import configparser
2
+ import logging
3
+
4
+ def getconfig(configfile_path: str):
5
+ """
6
+ Read the config file
7
+ Params
8
+ ----------------
9
+ configfile_path: file path of .cfg file
10
+ """
11
+ config = configparser.ConfigParser()
12
+ try:
13
+ config.read_file(open(configfile_path))
14
+ return config
15
+ except:
16
+ logging.warning("config file not found")
params.cfg ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [reranker]
2
+ MODEL = BAAI/bge-reranker-v2-m3
3
+ TOP_K = 5
4
+
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ gradio>=4.0.0
3
+ gradio[mcp]
4
+ python-dotenv>=1.0.0
5
+
6
+ # LangChain core
7
+ langchain-core>=0.1.0
8
+ langchain-community>=0.0.1
9
+
10
+ # Provider-specific LangChain packages
11
+ langchain-openai>=0.1.0
12
+ langchain-anthropic>=0.1.0
13
+ langchain-cohere>=0.1.0
14
+ langchain-together>=0.1.0
15
+ langchain-huggingface>=0.0.1
16
+
17
+ # Additional dependencies that might be needed
18
+ requests>=2.31.0
19
+ pydantic>=2.0.0
20
+ sentence-transformers>=2.2.2 # Required for cross-encoders
21
+ torch>=2.0.0 # Required for HuggingFace models