Spaces:
Paused
Paused
deployment
Browse files- .gitattributes +1 -0
- champ/agent.py +1 -1
- champ/rag.py +40 -16
- champ/service.py +15 -2
- main.py +4 -3
- rag_data/ALLEN_20260129_mdheader_recursivecharsplitter_chunks_v1.pkl +3 -0
- rag_data/FAISS_ALLEN_20260129/index.faiss +3 -0
- rag_data/FAISS_ALLEN_20260129/index.pkl +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
rag_data/FAISS_ALLEN_20260129/index.faiss filter=lfs diff=lfs merge=lfs -text
|
champ/agent.py
CHANGED
|
@@ -56,7 +56,7 @@ def build_champ_agent(vector_store: LCFAISS, repo_id: str = "openai/gpt-oss-20b"
|
|
| 56 |
hf_llm = HuggingFaceEndpoint(
|
| 57 |
repo_id=repo_id,
|
| 58 |
task="text-generation",
|
| 59 |
-
max_new_tokens=
|
| 60 |
temperature=0.2,
|
| 61 |
top_p = 0.9,
|
| 62 |
# huggingfacehub_api_token=... (optional; see service.py)
|
|
|
|
| 56 |
hf_llm = HuggingFaceEndpoint(
|
| 57 |
repo_id=repo_id,
|
| 58 |
task="text-generation",
|
| 59 |
+
max_new_tokens=1024,
|
| 60 |
temperature=0.2,
|
| 61 |
top_p = 0.9,
|
| 62 |
# huggingfacehub_api_token=... (optional; see service.py)
|
champ/rag.py
CHANGED
|
@@ -9,34 +9,58 @@ from langchain_community.vectorstores import FAISS as LCFAISS
|
|
| 9 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 10 |
|
| 11 |
|
| 12 |
-
def build_vector_store(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
base_dir: Path,
|
| 14 |
hf_token: str,
|
| 15 |
-
rag_relpath: str = "rag_data/
|
| 16 |
embedding_model: str = "BAAI/bge-large-en-v1.5",
|
| 17 |
device: str = "cpu",
|
| 18 |
) -> LCFAISS:
|
| 19 |
rag_path = base_dir / rag_relpath
|
| 20 |
-
with open(rag_path, "rb") as f:
|
| 21 |
-
loaded_documents = pickle.load(f)
|
| 22 |
|
| 23 |
model_embedding_kwargs = {"device": device, "use_auth_token": hf_token}
|
| 24 |
encode_kwargs = {"normalize_embeddings": True}
|
| 25 |
-
|
| 26 |
embeddings = HuggingFaceEmbeddings(
|
| 27 |
model_name=embedding_model,
|
| 28 |
model_kwargs=model_embedding_kwargs,
|
| 29 |
encode_kwargs=encode_kwargs,
|
| 30 |
)
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
index=index,
|
| 38 |
-
docstore=InMemoryDocstore(),
|
| 39 |
-
index_to_docstore_id={},
|
| 40 |
-
)
|
| 41 |
-
vector_store.add_documents(documents=loaded_documents)
|
| 42 |
-
return vector_store
|
|
|
|
| 9 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 10 |
|
| 11 |
|
| 12 |
+
# def build_vector_store(
|
| 13 |
+
# base_dir: Path,
|
| 14 |
+
# hf_token: str,
|
| 15 |
+
# rag_relpath: str = "rag_data/ALLEN_20260129_mdheader_recursivecharsplitter_chunks_v1.pkl",
|
| 16 |
+
# embedding_model: str = "BAAI/bge-large-en-v1.5",
|
| 17 |
+
# device: str = "cpu",
|
| 18 |
+
# ) -> LCFAISS:
|
| 19 |
+
# rag_path = base_dir / rag_relpath
|
| 20 |
+
# with open(rag_path, "rb") as f:
|
| 21 |
+
# loaded_documents = pickle.load(f)
|
| 22 |
+
|
| 23 |
+
# model_embedding_kwargs = {"device": device, "use_auth_token": hf_token}
|
| 24 |
+
# encode_kwargs = {"normalize_embeddings": True}
|
| 25 |
+
|
| 26 |
+
# embeddings = HuggingFaceEmbeddings(
|
| 27 |
+
# model_name=embedding_model,
|
| 28 |
+
# model_kwargs=model_embedding_kwargs,
|
| 29 |
+
# encode_kwargs=encode_kwargs,
|
| 30 |
+
# )
|
| 31 |
+
|
| 32 |
+
# embedding_dim = len(embeddings.embed_query("hello world"))
|
| 33 |
+
# index = faiss.IndexFlatL2(embedding_dim)
|
| 34 |
+
|
| 35 |
+
# vector_store = LCFAISS(
|
| 36 |
+
# embedding_function=embeddings,
|
| 37 |
+
# index=index,
|
| 38 |
+
# docstore=InMemoryDocstore(),
|
| 39 |
+
# index_to_docstore_id={},
|
| 40 |
+
# )
|
| 41 |
+
# vector_store.add_documents(documents=loaded_documents)
|
| 42 |
+
# return vector_store
|
| 43 |
+
|
| 44 |
+
def load_vector_store(
|
| 45 |
base_dir: Path,
|
| 46 |
hf_token: str,
|
| 47 |
+
rag_relpath: str = "rag_data/FAISS_ALLEN_20260129",
|
| 48 |
embedding_model: str = "BAAI/bge-large-en-v1.5",
|
| 49 |
device: str = "cpu",
|
| 50 |
) -> LCFAISS:
|
| 51 |
rag_path = base_dir / rag_relpath
|
|
|
|
|
|
|
| 52 |
|
| 53 |
model_embedding_kwargs = {"device": device, "use_auth_token": hf_token}
|
| 54 |
encode_kwargs = {"normalize_embeddings": True}
|
| 55 |
+
|
| 56 |
embeddings = HuggingFaceEmbeddings(
|
| 57 |
model_name=embedding_model,
|
| 58 |
model_kwargs=model_embedding_kwargs,
|
| 59 |
encode_kwargs=encode_kwargs,
|
| 60 |
)
|
| 61 |
|
| 62 |
+
return LCFAISS.load_local(
|
| 63 |
+
str(rag_path),
|
| 64 |
+
embeddings,
|
| 65 |
+
allow_dangerous_deserialization=True, # safe because you built the files
|
| 66 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
champ/service.py
CHANGED
|
@@ -9,7 +9,7 @@ from langchain_community.vectorstores import FAISS as LCFAISS
|
|
| 9 |
from langchain_core.messages import HumanMessage
|
| 10 |
|
| 11 |
|
| 12 |
-
from .rag import
|
| 13 |
from .agent import build_champ_agent
|
| 14 |
from .triage import safety_triage
|
| 15 |
|
|
@@ -23,9 +23,22 @@ class ChampService:
|
|
| 23 |
agent = None
|
| 24 |
|
| 25 |
async def init(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
loop = asyncio.get_running_loop()
|
| 27 |
self.vector_store = await loop.run_in_executor(
|
| 28 |
-
None,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
)
|
| 30 |
self.agent = build_champ_agent(self.vector_store)
|
| 31 |
|
|
|
|
| 9 |
from langchain_core.messages import HumanMessage
|
| 10 |
|
| 11 |
|
| 12 |
+
from .rag import load_vector_store
|
| 13 |
from .agent import build_champ_agent
|
| 14 |
from .triage import safety_triage
|
| 15 |
|
|
|
|
| 23 |
agent = None
|
| 24 |
|
| 25 |
async def init(self):
|
| 26 |
+
rag_relpath = "rag_data/FAISS_ALLEN_20260129"
|
| 27 |
+
rag_dir = self.base_dir / rag_relpath
|
| 28 |
+
|
| 29 |
+
if not rag_dir.exists():
|
| 30 |
+
raise RuntimeError(
|
| 31 |
+
f"FAISS index not found at {rag_dir}. "
|
| 32 |
+
"Build it locally and upload it (index.faiss + index.pkl)."
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
loop = asyncio.get_running_loop()
|
| 36 |
self.vector_store = await loop.run_in_executor(
|
| 37 |
+
None,
|
| 38 |
+
load_vector_store,
|
| 39 |
+
self.base_dir,
|
| 40 |
+
self.hf_token,
|
| 41 |
+
rag_relpath,
|
| 42 |
)
|
| 43 |
self.agent = build_champ_agent(self.vector_store)
|
| 44 |
|
main.py
CHANGED
|
@@ -4,7 +4,7 @@ from contextlib import asynccontextmanager
|
|
| 4 |
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
-
from typing import List, Literal, Optional
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
|
| 10 |
from dotenv import load_dotenv
|
|
@@ -142,7 +142,7 @@ def _call_hf_client(model_id: str, msgs: list[dict], temperature: float,) -> str
|
|
| 142 |
except Exception:
|
| 143 |
return str(resp)
|
| 144 |
|
| 145 |
-
def call_llm(req: ChatRequest) -> str:
|
| 146 |
if req.model_type == "champ":
|
| 147 |
msgs = convert_messages_langchain(req.messages)
|
| 148 |
reply, triage_meta = champ.invoke(msgs)
|
|
@@ -159,7 +159,8 @@ def call_llm(req: ChatRequest) -> str:
|
|
| 159 |
|
| 160 |
if req.model_type == "google":
|
| 161 |
return _call_gemini(model_id, msgs, req.temperature), {}
|
| 162 |
-
|
|
|
|
| 163 |
raise ValueError(f"Unhandled model_type: {req.model_type}")
|
| 164 |
|
| 165 |
|
|
|
|
| 4 |
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
+
from typing import List, Literal, Optional, Tuple, Dict, Any
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
|
| 10 |
from dotenv import load_dotenv
|
|
|
|
| 142 |
except Exception:
|
| 143 |
return str(resp)
|
| 144 |
|
| 145 |
+
def call_llm(req: ChatRequest) -> Tuple[str, Dict[str, Any]]:
|
| 146 |
if req.model_type == "champ":
|
| 147 |
msgs = convert_messages_langchain(req.messages)
|
| 148 |
reply, triage_meta = champ.invoke(msgs)
|
|
|
|
| 159 |
|
| 160 |
if req.model_type == "google":
|
| 161 |
return _call_gemini(model_id, msgs, req.temperature), {}
|
| 162 |
+
|
| 163 |
+
# If you later add HF models via hf_client, handle here.
|
| 164 |
raise ValueError(f"Unhandled model_type: {req.model_type}")
|
| 165 |
|
| 166 |
|
rag_data/ALLEN_20260129_mdheader_recursivecharsplitter_chunks_v1.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f77458e42d2c79b7f1fef2a3e1fac8d581777097c9b4e6c8b0dae6e6e7a304fc
|
| 3 |
+
size 2400110
|
rag_data/FAISS_ALLEN_20260129/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14ed51d787f8e3239deb7c4f447febb93891a61c7a4c621441c18833e27cd7dc
|
| 3 |
+
size 11075629
|
rag_data/FAISS_ALLEN_20260129/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:527f871ecfc7c5ff28e1e3711f2b7cfb660c35a4c982e37e6fef9ee5ebebebb3
|
| 3 |
+
size 2543100
|