qyle commited on
Commit
e43b823
·
verified ·
1 Parent(s): 8fadf17

deployment

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ rag_data/FAISS_ALLEN_20260129/index.faiss filter=lfs diff=lfs merge=lfs -text
champ/agent.py CHANGED
@@ -56,7 +56,7 @@ def build_champ_agent(vector_store: LCFAISS, repo_id: str = "openai/gpt-oss-20b"
56
  hf_llm = HuggingFaceEndpoint(
57
  repo_id=repo_id,
58
  task="text-generation",
59
- max_new_tokens=500,
60
  temperature=0.2,
61
  top_p = 0.9,
62
  # huggingfacehub_api_token=... (optional; see service.py)
 
56
  hf_llm = HuggingFaceEndpoint(
57
  repo_id=repo_id,
58
  task="text-generation",
59
+ max_new_tokens=1024,
60
  temperature=0.2,
61
  top_p = 0.9,
62
  # huggingfacehub_api_token=... (optional; see service.py)
champ/rag.py CHANGED
@@ -9,34 +9,58 @@ from langchain_community.vectorstores import FAISS as LCFAISS
9
  from langchain_huggingface import HuggingFaceEmbeddings
10
 
11
 
12
- def build_vector_store(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  base_dir: Path,
14
  hf_token: str,
15
- rag_relpath: str = "rag_data/netg_baaibge_chunks_v1.pkl",
16
  embedding_model: str = "BAAI/bge-large-en-v1.5",
17
  device: str = "cpu",
18
  ) -> LCFAISS:
19
  rag_path = base_dir / rag_relpath
20
- with open(rag_path, "rb") as f:
21
- loaded_documents = pickle.load(f)
22
 
23
  model_embedding_kwargs = {"device": device, "use_auth_token": hf_token}
24
  encode_kwargs = {"normalize_embeddings": True}
25
-
26
  embeddings = HuggingFaceEmbeddings(
27
  model_name=embedding_model,
28
  model_kwargs=model_embedding_kwargs,
29
  encode_kwargs=encode_kwargs,
30
  )
31
 
32
- embedding_dim = len(embeddings.embed_query("hello world"))
33
- index = faiss.IndexFlatL2(embedding_dim)
34
-
35
- vector_store = LCFAISS(
36
- embedding_function=embeddings,
37
- index=index,
38
- docstore=InMemoryDocstore(),
39
- index_to_docstore_id={},
40
- )
41
- vector_store.add_documents(documents=loaded_documents)
42
- return vector_store
 
9
  from langchain_huggingface import HuggingFaceEmbeddings
10
 
11
 
12
+ # def build_vector_store(
13
+ # base_dir: Path,
14
+ # hf_token: str,
15
+ # rag_relpath: str = "rag_data/ALLEN_20260129_mdheader_recursivecharsplitter_chunks_v1.pkl",
16
+ # embedding_model: str = "BAAI/bge-large-en-v1.5",
17
+ # device: str = "cpu",
18
+ # ) -> LCFAISS:
19
+ # rag_path = base_dir / rag_relpath
20
+ # with open(rag_path, "rb") as f:
21
+ # loaded_documents = pickle.load(f)
22
+
23
+ # model_embedding_kwargs = {"device": device, "use_auth_token": hf_token}
24
+ # encode_kwargs = {"normalize_embeddings": True}
25
+
26
+ # embeddings = HuggingFaceEmbeddings(
27
+ # model_name=embedding_model,
28
+ # model_kwargs=model_embedding_kwargs,
29
+ # encode_kwargs=encode_kwargs,
30
+ # )
31
+
32
+ # embedding_dim = len(embeddings.embed_query("hello world"))
33
+ # index = faiss.IndexFlatL2(embedding_dim)
34
+
35
+ # vector_store = LCFAISS(
36
+ # embedding_function=embeddings,
37
+ # index=index,
38
+ # docstore=InMemoryDocstore(),
39
+ # index_to_docstore_id={},
40
+ # )
41
+ # vector_store.add_documents(documents=loaded_documents)
42
+ # return vector_store
43
+
44
+ def load_vector_store(
45
  base_dir: Path,
46
  hf_token: str,
47
+ rag_relpath: str = "rag_data/FAISS_ALLEN_20260129",
48
  embedding_model: str = "BAAI/bge-large-en-v1.5",
49
  device: str = "cpu",
50
  ) -> LCFAISS:
51
  rag_path = base_dir / rag_relpath
 
 
52
 
53
  model_embedding_kwargs = {"device": device, "use_auth_token": hf_token}
54
  encode_kwargs = {"normalize_embeddings": True}
55
+
56
  embeddings = HuggingFaceEmbeddings(
57
  model_name=embedding_model,
58
  model_kwargs=model_embedding_kwargs,
59
  encode_kwargs=encode_kwargs,
60
  )
61
 
62
+ return LCFAISS.load_local(
63
+ str(rag_path),
64
+ embeddings,
65
+ allow_dangerous_deserialization=True, # safe because you built the files
66
+ )
 
 
 
 
 
 
champ/service.py CHANGED
@@ -9,7 +9,7 @@ from langchain_community.vectorstores import FAISS as LCFAISS
9
  from langchain_core.messages import HumanMessage
10
 
11
 
12
- from .rag import build_vector_store
13
  from .agent import build_champ_agent
14
  from .triage import safety_triage
15
 
@@ -23,9 +23,22 @@ class ChampService:
23
  agent = None
24
 
25
  async def init(self):
 
 
 
 
 
 
 
 
 
26
  loop = asyncio.get_running_loop()
27
  self.vector_store = await loop.run_in_executor(
28
- None, build_vector_store, self.base_dir, self.hf_token
 
 
 
 
29
  )
30
  self.agent = build_champ_agent(self.vector_store)
31
 
 
9
  from langchain_core.messages import HumanMessage
10
 
11
 
12
+ from .rag import load_vector_store
13
  from .agent import build_champ_agent
14
  from .triage import safety_triage
15
 
 
23
  agent = None
24
 
25
  async def init(self):
26
+ rag_relpath = "rag_data/FAISS_ALLEN_20260129"
27
+ rag_dir = self.base_dir / rag_relpath
28
+
29
+ if not rag_dir.exists():
30
+ raise RuntimeError(
31
+ f"FAISS index not found at {rag_dir}. "
32
+ "Build it locally and upload it (index.faiss + index.pkl)."
33
+ )
34
+
35
  loop = asyncio.get_running_loop()
36
  self.vector_store = await loop.run_in_executor(
37
+ None,
38
+ load_vector_store,
39
+ self.base_dir,
40
+ self.hf_token,
41
+ rag_relpath,
42
  )
43
  self.agent = build_champ_agent(self.vector_store)
44
 
main.py CHANGED
@@ -4,7 +4,7 @@ from contextlib import asynccontextmanager
4
 
5
  from pathlib import Path
6
 
7
- from typing import List, Literal, Optional
8
  from datetime import datetime, timezone
9
 
10
  from dotenv import load_dotenv
@@ -142,7 +142,7 @@ def _call_hf_client(model_id: str, msgs: list[dict], temperature: float,) -> str
142
  except Exception:
143
  return str(resp)
144
 
145
- def call_llm(req: ChatRequest) -> str:
146
  if req.model_type == "champ":
147
  msgs = convert_messages_langchain(req.messages)
148
  reply, triage_meta = champ.invoke(msgs)
@@ -159,7 +159,8 @@ def call_llm(req: ChatRequest) -> str:
159
 
160
  if req.model_type == "google":
161
  return _call_gemini(model_id, msgs, req.temperature), {}
162
-
 
163
  raise ValueError(f"Unhandled model_type: {req.model_type}")
164
 
165
 
 
4
 
5
  from pathlib import Path
6
 
7
+ from typing import List, Literal, Optional, Tuple, Dict, Any
8
  from datetime import datetime, timezone
9
 
10
  from dotenv import load_dotenv
 
142
  except Exception:
143
  return str(resp)
144
 
145
+ def call_llm(req: ChatRequest) -> Tuple[str, Dict[str, Any]]:
146
  if req.model_type == "champ":
147
  msgs = convert_messages_langchain(req.messages)
148
  reply, triage_meta = champ.invoke(msgs)
 
159
 
160
  if req.model_type == "google":
161
  return _call_gemini(model_id, msgs, req.temperature), {}
162
+
163
+ # If you later add HF models via hf_client, handle here.
164
  raise ValueError(f"Unhandled model_type: {req.model_type}")
165
 
166
 
rag_data/ALLEN_20260129_mdheader_recursivecharsplitter_chunks_v1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77458e42d2c79b7f1fef2a3e1fac8d581777097c9b4e6c8b0dae6e6e7a304fc
3
+ size 2400110
rag_data/FAISS_ALLEN_20260129/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ed51d787f8e3239deb7c4f447febb93891a61c7a4c621441c18833e27cd7dc
3
+ size 11075629
rag_data/FAISS_ALLEN_20260129/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527f871ecfc7c5ff28e1e3711f2b7cfb660c35a4c982e37e6fef9ee5ebebebb3
3
+ size 2543100