Spaces:
Sleeping
Sleeping
Fix compatibility issues with OpenAI embeddings and Qdrant client
Browse files- streamlit_app.py +51 -7
streamlit_app.py
CHANGED
|
@@ -111,12 +111,45 @@ def get_agent_model():
|
|
| 111 |
@st.cache_resource
|
| 112 |
def get_embedding_model():
|
| 113 |
"""Get the embedding model."""
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
@st.cache_resource
|
| 117 |
def setup_qdrant_client():
|
| 118 |
"""Set up the Qdrant client."""
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
def retrieve_documents(query, k=5):
|
| 122 |
"""Retrieve relevant documents for a query."""
|
|
@@ -132,11 +165,22 @@ def retrieve_documents(query, k=5):
|
|
| 132 |
query_embedding = embedding_model.embed_query(query)
|
| 133 |
|
| 134 |
# Search Qdrant
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
# Convert results to documents
|
| 142 |
documents = []
|
|
|
|
| 111 |
@st.cache_resource
|
| 112 |
def get_embedding_model():
|
| 113 |
"""Get the embedding model."""
|
| 114 |
+
try:
|
| 115 |
+
# First try the standard approach
|
| 116 |
+
return OpenAIEmbeddings(model="text-embedding-3-small")
|
| 117 |
+
except Exception as e:
|
| 118 |
+
if "proxies" in str(e):
|
| 119 |
+
# If there's a proxies error, create with additional kwargs to override defaults
|
| 120 |
+
import os
|
| 121 |
+
from langchain_openai import OpenAIEmbeddings
|
| 122 |
+
|
| 123 |
+
return OpenAIEmbeddings(
|
| 124 |
+
model="text-embedding-3-small",
|
| 125 |
+
openai_api_key=os.environ.get("OPENAI_API_KEY"),
|
| 126 |
+
show_progress_bar=False,
|
| 127 |
+
client_kwargs={"proxies": None, "timeout": 60}
|
| 128 |
+
)
|
| 129 |
+
else:
|
| 130 |
+
# Re-raise any other exceptions
|
| 131 |
+
raise
|
| 132 |
|
| 133 |
@st.cache_resource
|
| 134 |
def setup_qdrant_client():
|
| 135 |
"""Set up the Qdrant client."""
|
| 136 |
+
try:
|
| 137 |
+
return QdrantClient(path=str(QDRANT_DIR))
|
| 138 |
+
except Exception as e:
|
| 139 |
+
# If there's an issue with the standard approach, print diagnostics and retry
|
| 140 |
+
print(f"QdrantClient initialization error: {str(e)}")
|
| 141 |
+
print(f"Checking if directory exists: {os.path.exists(str(QDRANT_DIR))}")
|
| 142 |
+
|
| 143 |
+
# Try alternative approach with explicit collection params
|
| 144 |
+
if os.path.exists(str(QDRANT_DIR)):
|
| 145 |
+
try:
|
| 146 |
+
# Try with location parameter instead
|
| 147 |
+
return QdrantClient(location=str(QDRANT_DIR))
|
| 148 |
+
except Exception as e2:
|
| 149 |
+
print(f"Alternative initialization also failed: {str(e2)}")
|
| 150 |
+
raise
|
| 151 |
+
else:
|
| 152 |
+
raise ValueError(f"Qdrant directory does not exist: {str(QDRANT_DIR)}")
|
| 153 |
|
| 154 |
def retrieve_documents(query, k=5):
|
| 155 |
"""Retrieve relevant documents for a query."""
|
|
|
|
| 165 |
query_embedding = embedding_model.embed_query(query)
|
| 166 |
|
| 167 |
# Search Qdrant
|
| 168 |
+
try:
|
| 169 |
+
# Try the new API method first
|
| 170 |
+
results = client.query_points(
|
| 171 |
+
collection_name="kohavi_ab_testing_pdf_collection",
|
| 172 |
+
query_vector=query_embedding,
|
| 173 |
+
limit=k
|
| 174 |
+
)
|
| 175 |
+
except Exception as e:
|
| 176 |
+
print(f"Error with query_points method: {str(e)}")
|
| 177 |
+
# Fall back to the deprecated method
|
| 178 |
+
results = client.search(
|
| 179 |
+
collection_name="kohavi_ab_testing_pdf_collection",
|
| 180 |
+
query_vector=query_embedding,
|
| 181 |
+
limit=k
|
| 182 |
+
)
|
| 183 |
+
print("Using deprecated search method")
|
| 184 |
|
| 185 |
# Convert results to documents
|
| 186 |
documents = []
|