Spaces:
Paused
Paused
reorder program
Browse files
app.py
CHANGED
|
@@ -29,13 +29,6 @@ ZIP_FILE = "chroma_db.zip"
|
|
| 29 |
reranker_model = None
|
| 30 |
embedding = None
|
| 31 |
|
| 32 |
-
if not os.path.exists(CHROMA_PATH):
|
| 33 |
-
print("Downloading ChromaDB from Google Drive...")
|
| 34 |
-
subprocess.run(["gdown", f"https://drive.google.com/uc?id={GOOGLE_DRIVE_FILE_ID}", "-O", ZIP_FILE])
|
| 35 |
-
subprocess.run(["unzip", ZIP_FILE]) # Extract database
|
| 36 |
-
retriever = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding).as_retriever(search_kwargs={"k": 20})
|
| 37 |
-
print("ChromaDB loaded!")
|
| 38 |
-
|
| 39 |
def load_embedding_model(model_path : str):
|
| 40 |
start_time = time.time()
|
| 41 |
encode_kwargs = {"normalize_embeddings": True}
|
|
@@ -48,6 +41,21 @@ def load_embedding_model(model_path : str):
|
|
| 48 |
print(f'model load time {round(end_time - start_time, 0)} second')
|
| 49 |
return local_embedding
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def rag_with_reranking(query : str):
|
| 52 |
compressor = CrossEncoderReranker(model=reranker_model, top_n=3)
|
| 53 |
compression_retriever = ContextualCompressionRetriever(
|
|
@@ -59,15 +67,8 @@ def rag_with_reranking(query : str):
|
|
| 59 |
@app.get("/search")
|
| 60 |
def search_text(query):
|
| 61 |
"""Searches for similar texts."""
|
| 62 |
-
|
| 63 |
-
|
| 64 |
print(f"Searching for: {query}")
|
| 65 |
-
if not reranker_model:
|
| 66 |
-
reranker_model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-v2-m3")
|
| 67 |
-
print("reranker model loaded")
|
| 68 |
-
if not embedding:
|
| 69 |
-
embedding = load_embedding_model(model_path="intfloat/multilingual-e5-large")
|
| 70 |
-
print("embedding model loaded")
|
| 71 |
|
| 72 |
results = rag_with_reranking(query)
|
| 73 |
|
|
|
|
| 29 |
reranker_model = None
|
| 30 |
embedding = None
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
def load_embedding_model(model_path : str):
|
| 33 |
start_time = time.time()
|
| 34 |
encode_kwargs = {"normalize_embeddings": True}
|
|
|
|
| 41 |
print(f'model load time {round(end_time - start_time, 0)} second')
|
| 42 |
return local_embedding
|
| 43 |
|
| 44 |
+
if not reranker_model:
|
| 45 |
+
reranker_model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-v2-m3")
|
| 46 |
+
print("reranker model loaded")
|
| 47 |
+
if not embedding:
|
| 48 |
+
embedding = load_embedding_model(model_path="intfloat/multilingual-e5-large")
|
| 49 |
+
print("embedding model loaded")
|
| 50 |
+
|
| 51 |
+
if not os.path.exists(CHROMA_PATH):
|
| 52 |
+
print("Downloading ChromaDB from Google Drive...")
|
| 53 |
+
subprocess.run(["gdown", f"https://drive.google.com/uc?id={GOOGLE_DRIVE_FILE_ID}", "-O", ZIP_FILE])
|
| 54 |
+
subprocess.run(["unzip", ZIP_FILE]) # Extract database
|
| 55 |
+
retriever = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding).as_retriever(search_kwargs={"k": 20})
|
| 56 |
+
print("ChromaDB loaded!")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
def rag_with_reranking(query : str):
|
| 60 |
compressor = CrossEncoderReranker(model=reranker_model, top_n=3)
|
| 61 |
compression_retriever = ContextualCompressionRetriever(
|
|
|
|
| 67 |
@app.get("/search")
|
| 68 |
def search_text(query):
|
| 69 |
"""Searches for similar texts."""
|
| 70 |
+
|
|
|
|
| 71 |
print(f"Searching for: {query}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
results = rag_with_reranking(query)
|
| 74 |
|