Spaces:
Running
Running
manpreet88 commited on
Commit ·
f39b235
1
Parent(s): 30fd755
Update rag_pipeline.py
Browse files
PolyAgent/rag_pipeline.py
CHANGED
|
@@ -65,7 +65,7 @@ TARGET_EPMC = 200
|
|
| 65 |
TARGET_DATABASES = 100
|
| 66 |
|
| 67 |
# --------------------------------------------------------------------------------------
|
| 68 |
-
# Polymer keywords
|
| 69 |
# --------------------------------------------------------------------------------------
|
| 70 |
POLYMER_KEYWORDS = [
|
| 71 |
"polymer",
|
|
@@ -812,7 +812,7 @@ def fetch_polymer_journal_pdfs(
|
|
| 812 |
|
| 813 |
|
| 814 |
# --------------------------------------------------------------------------------------
|
| 815 |
-
# WRAPPER FOR OPENAI EMBEDDINGS
|
| 816 |
# --------------------------------------------------------------------------------------
|
| 817 |
class PolymerStyleOpenAIEmbeddings(OpenAIEmbeddings):
|
| 818 |
"""
|
|
@@ -950,9 +950,6 @@ def _split_and_build_retriever(
|
|
| 950 |
print(f"→ Using OpenAI embeddings model: {embedding_model}")
|
| 951 |
embeddings = PolymerStyleOpenAIEmbeddings(model=embedding_model, api_key=api_key)
|
| 952 |
|
| 953 |
-
# --------------------------------------------------------------------------------------
|
| 954 |
-
# CRITICAL FIX: Delete existing DB if it exists to prevent dimension mismatch
|
| 955 |
-
# --------------------------------------------------------------------------------------
|
| 956 |
if vector_backend.lower() == "chroma":
|
| 957 |
if persist_dir and os.path.exists(persist_dir):
|
| 958 |
print(f"→ Deleting existing Chroma database at {persist_dir} to prevent dimension mismatch...")
|
|
@@ -994,8 +991,6 @@ def _split_and_build_retriever(
|
|
| 994 |
|
| 995 |
time.sleep(0.5) # Small delay to avoid rate limiting
|
| 996 |
|
| 997 |
-
print("→ All batches embedded and persisted!")
|
| 998 |
-
|
| 999 |
elif vector_backend.lower() == "faiss":
|
| 1000 |
try:
|
| 1001 |
from langchain_community.vectorstores import FAISS
|
|
|
|
| 65 |
TARGET_DATABASES = 100
|
| 66 |
|
| 67 |
# --------------------------------------------------------------------------------------
|
| 68 |
+
# Polymer keywords
|
| 69 |
# --------------------------------------------------------------------------------------
|
| 70 |
POLYMER_KEYWORDS = [
|
| 71 |
"polymer",
|
|
|
|
| 812 |
|
| 813 |
|
| 814 |
# --------------------------------------------------------------------------------------
|
| 815 |
+
# WRAPPER FOR OPENAI EMBEDDINGS
|
| 816 |
# --------------------------------------------------------------------------------------
|
| 817 |
class PolymerStyleOpenAIEmbeddings(OpenAIEmbeddings):
|
| 818 |
"""
|
|
|
|
| 950 |
print(f"→ Using OpenAI embeddings model: {embedding_model}")
|
| 951 |
embeddings = PolymerStyleOpenAIEmbeddings(model=embedding_model, api_key=api_key)
|
| 952 |
|
|
|
|
|
|
|
|
|
|
| 953 |
if vector_backend.lower() == "chroma":
|
| 954 |
if persist_dir and os.path.exists(persist_dir):
|
| 955 |
print(f"→ Deleting existing Chroma database at {persist_dir} to prevent dimension mismatch...")
|
|
|
|
| 991 |
|
| 992 |
time.sleep(0.5) # Small delay to avoid rate limiting
|
| 993 |
|
|
|
|
|
|
|
| 994 |
elif vector_backend.lower() == "faiss":
|
| 995 |
try:
|
| 996 |
from langchain_community.vectorstores import FAISS
|