cjber commited on
Commit
2cdef68
·
1 Parent(s): 7116b5e

add alpha to hybrid search

Browse files
config/config.toml CHANGED
@@ -7,5 +7,6 @@ chunk_size = 1024
7
  chunk_overlap = 32
8
 
9
  [model]
10
- llm = "gpt-3.5-turbo"
11
  top_k = 100
 
 
7
  chunk_overlap = 32
8
 
9
  [model]
10
+ llm = "gpt-4o-mini"
11
  top_k = 100
12
+ alpha = 0.75
src/common/settings.py CHANGED
@@ -19,6 +19,7 @@ class DataStoreSettings(BaseSettings):
19
  class ModelSettings(BaseSettings):
20
  llm: str = Field(min_length=1)
21
  top_k: int = Field(gt=0, le=100)
 
22
 
23
 
24
  class Settings(BaseSettings):
 
19
  class ModelSettings(BaseSettings):
20
  llm: str = Field(min_length=1)
21
  top_k: int = Field(gt=0, le=100)
22
+ alpha: float = Field(ge=0.0, le=1.0)
23
 
24
 
25
  class Settings(BaseSettings):
src/model/model.py CHANGED
@@ -1,10 +1,8 @@
1
  from typing import TypedDict
2
 
3
  from dotenv import load_dotenv
4
- from langchain.retrievers import (
5
- ContextualCompressionRetriever,
6
- PineconeHybridSearchRetriever,
7
- )
8
 
9
  # from langchain.retrievers.document_compressors import FlashrankRerank
10
  from langchain_core.documents import Document
@@ -60,13 +58,13 @@ def create_retriever():
60
  pc = Pinecone()
61
  index = pc.Index(cfg.datastore.index_name, host=cfg.datastore.host)
62
  embeddings = OpenAIEmbeddings(model=cfg.datastore.embed_model)
63
- retriever = PineconeHybridSearchRetriever(
64
  embeddings=embeddings,
65
  sparse_encoder=bm25_encoder,
66
  index=index,
67
  top_k=cfg.model.top_k,
 
68
  )
69
- return retriever
70
 
71
 
72
  def retrieve(state, retriever):
 
1
  from typing import TypedDict
2
 
3
  from dotenv import load_dotenv
4
+ from langchain.retrievers import ContextualCompressionRetriever
5
+ from langchain_community.retrievers import PineconeHybridSearchRetriever
 
 
6
 
7
  # from langchain.retrievers.document_compressors import FlashrankRerank
8
  from langchain_core.documents import Document
 
58
  pc = Pinecone()
59
  index = pc.Index(cfg.datastore.index_name, host=cfg.datastore.host)
60
  embeddings = OpenAIEmbeddings(model=cfg.datastore.embed_model)
61
+ return PineconeHybridSearchRetriever(
62
  embeddings=embeddings,
63
  sparse_encoder=bm25_encoder,
64
  index=index,
65
  top_k=cfg.model.top_k,
66
+ alpha=cfg.model.alpha,
67
  )
 
68
 
69
 
70
  def retrieve(state, retriever):