Adhin commited on
Commit
a852a21
·
1 Parent(s): 2f45137

reorder program

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -29,13 +29,6 @@ ZIP_FILE = "chroma_db.zip"
29
  reranker_model = None
30
  embedding = None
31
 
32
- if not os.path.exists(CHROMA_PATH):
33
- print("Downloading ChromaDB from Google Drive...")
34
- subprocess.run(["gdown", f"https://drive.google.com/uc?id={GOOGLE_DRIVE_FILE_ID}", "-O", ZIP_FILE])
35
- subprocess.run(["unzip", ZIP_FILE]) # Extract database
36
- retriever = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding).as_retriever(search_kwargs={"k": 20})
37
- print("ChromaDB loaded!")
38
-
39
  def load_embedding_model(model_path : str):
40
  start_time = time.time()
41
  encode_kwargs = {"normalize_embeddings": True}
@@ -48,6 +41,21 @@ def load_embedding_model(model_path : str):
48
  print(f'model load time {round(end_time - start_time, 0)} second')
49
  return local_embedding
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def rag_with_reranking(query : str):
52
  compressor = CrossEncoderReranker(model=reranker_model, top_n=3)
53
  compression_retriever = ContextualCompressionRetriever(
@@ -59,15 +67,8 @@ def rag_with_reranking(query : str):
59
  @app.get("/search")
60
  def search_text(query):
61
  """Searches for similar texts."""
62
- global reranker_model, embedding
63
-
64
  print(f"Searching for: {query}")
65
- if not reranker_model:
66
- reranker_model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-v2-m3")
67
- print("reranker model loaded")
68
- if not embedding:
69
- embedding = load_embedding_model(model_path="intfloat/multilingual-e5-large")
70
- print("embedding model loaded")
71
 
72
  results = rag_with_reranking(query)
73
 
 
29
  reranker_model = None
30
  embedding = None
31
 
 
 
 
 
 
 
 
32
  def load_embedding_model(model_path : str):
33
  start_time = time.time()
34
  encode_kwargs = {"normalize_embeddings": True}
 
41
  print(f'model load time {round(end_time - start_time, 0)} second')
42
  return local_embedding
43
 
44
+ if not reranker_model:
45
+ reranker_model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-v2-m3")
46
+ print("reranker model loaded")
47
+ if not embedding:
48
+ embedding = load_embedding_model(model_path="intfloat/multilingual-e5-large")
49
+ print("embedding model loaded")
50
+
51
+ if not os.path.exists(CHROMA_PATH):
52
+ print("Downloading ChromaDB from Google Drive...")
53
+ subprocess.run(["gdown", f"https://drive.google.com/uc?id={GOOGLE_DRIVE_FILE_ID}", "-O", ZIP_FILE])
54
+ subprocess.run(["unzip", ZIP_FILE]) # Extract database
55
+ retriever = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding).as_retriever(search_kwargs={"k": 20})
56
+ print("ChromaDB loaded!")
57
+
58
+
59
  def rag_with_reranking(query : str):
60
  compressor = CrossEncoderReranker(model=reranker_model, top_n=3)
61
  compression_retriever = ContextualCompressionRetriever(
 
67
  @app.get("/search")
68
  def search_text(query):
69
  """Searches for similar texts."""
70
+
 
71
  print(f"Searching for: {query}")
 
 
 
 
 
 
72
 
73
  results = rag_with_reranking(query)
74