Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,6 +21,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
| 21 |
from langchain_community.vectorstores import Cassandra
|
| 22 |
from langchain_openai import OpenAIEmbeddings
|
| 23 |
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
| 24 |
|
| 25 |
# Load environment variables
|
| 26 |
load_dotenv()
|
|
@@ -79,54 +80,20 @@ def init_astra_db():
|
|
| 79 |
astra_db_region = os.getenv("ASTRA_DB_REGION")
|
| 80 |
astra_db_keyspace = os.getenv("ASTRA_DB_KEYSPACE")
|
| 81 |
astra_db_application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
|
| 82 |
-
astra_db_endpoint = os.getenv("ASTRA_DB_ENDPOINT","https://8e3fd85c-5f28-4e1f-8538-9dd28a3ea2b0-us-east-2.apps.astra.datastax.com")
|
| 83 |
|
| 84 |
# Initialize the client
|
| 85 |
-
client = DataAPIClient(
|
| 86 |
db = client.get_database_by_api_endpoint(
|
| 87 |
astra_db_endpoint,
|
| 88 |
keyspace=astra_db_keyspace
|
| 89 |
)
|
| 90 |
|
| 91 |
-
# Create keyspace if it doesn't exist
|
| 92 |
-
session.execute(f"""
|
| 93 |
-
CREATE KEYSPACE IF NOT EXISTS {astra_db_keyspace}
|
| 94 |
-
WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': '3'}}
|
| 95 |
-
""")
|
| 96 |
-
|
| 97 |
-
# Create table for vector embeddings if it doesn't exist
|
| 98 |
-
session.execute(f"""
|
| 99 |
-
CREATE TABLE IF NOT EXISTS {astra_db_keyspace}.product_embeddings (
|
| 100 |
-
id text PRIMARY KEY,
|
| 101 |
-
product_type text,
|
| 102 |
-
content text,
|
| 103 |
-
embedding_vector list<float>,
|
| 104 |
-
metadata text
|
| 105 |
-
)
|
| 106 |
-
""")
|
| 107 |
-
|
| 108 |
# Get or create collections
|
| 109 |
product_embeddings = db.get_collection("product_embeddings")
|
| 110 |
query_analytics = db.get_collection("query_analytics")
|
| 111 |
product_images = db.get_collection("product_images")
|
| 112 |
|
| 113 |
-
# Create vector index for product_embeddings if it doesn't exist
|
| 114 |
-
try:
|
| 115 |
-
# Check if vector search is already set up
|
| 116 |
-
vector_indexes = product_embeddings.get_vector_indexes()
|
| 117 |
-
has_vector_index = any(index.get("name") == "embedding_vector_index" for index in vector_indexes)
|
| 118 |
-
|
| 119 |
-
if not has_vector_index:
|
| 120 |
-
# Create vector index
|
| 121 |
-
product_embeddings.create_vector_index(
|
| 122 |
-
vector_field="$vector",
|
| 123 |
-
dimension=1536, # Adjust dimension based on your embedding model
|
| 124 |
-
index_name="embedding_vector_index"
|
| 125 |
-
)
|
| 126 |
-
print("Vector index created for product_embeddings collection")
|
| 127 |
-
except Exception as e:
|
| 128 |
-
print(f"Warning: Could not create vector index: {e}")
|
| 129 |
-
|
| 130 |
print(f"Connected to Astra DB: {db.list_collection_names()}")
|
| 131 |
|
| 132 |
# Return DB client and collections for use in application
|
|
@@ -681,7 +648,14 @@ def setup_and_update():
|
|
| 681 |
|
| 682 |
# Initialize database and other services
|
| 683 |
global astra_session, astra_keyspace, s3_client, embeddings_model
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
s3_client = init_s3_client()
|
| 686 |
embeddings_model = get_embeddings_model()
|
| 687 |
|
|
|
|
| 21 |
from langchain_community.vectorstores import Cassandra
|
| 22 |
from langchain_openai import OpenAIEmbeddings
|
| 23 |
from PIL import Image, ImageDraw, ImageFont
|
| 24 |
+
from astrapy.db import AstraDB as DataAPIClient
|
| 25 |
|
| 26 |
# Load environment variables
|
| 27 |
load_dotenv()
|
|
|
|
| 80 |
astra_db_region = os.getenv("ASTRA_DB_REGION")
|
| 81 |
astra_db_keyspace = os.getenv("ASTRA_DB_KEYSPACE")
|
| 82 |
astra_db_application_token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
|
| 83 |
+
astra_db_endpoint = os.getenv("ASTRA_DB_ENDPOINT", "https://8e3fd85c-5f28-4e1f-8538-9dd28a3ea2b0-us-east-2.apps.astra.datastax.com")
|
| 84 |
|
| 85 |
# Initialize the client
|
| 86 |
+
client = DataAPIClient(astra_db_application_token)
|
| 87 |
db = client.get_database_by_api_endpoint(
|
| 88 |
astra_db_endpoint,
|
| 89 |
keyspace=astra_db_keyspace
|
| 90 |
)
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
# Get or create collections
|
| 93 |
product_embeddings = db.get_collection("product_embeddings")
|
| 94 |
query_analytics = db.get_collection("query_analytics")
|
| 95 |
product_images = db.get_collection("product_images")
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
print(f"Connected to Astra DB: {db.list_collection_names()}")
|
| 98 |
|
| 99 |
# Return DB client and collections for use in application
|
|
|
|
| 648 |
|
| 649 |
# Initialize database and other services
|
| 650 |
global astra_session, astra_keyspace, s3_client, embeddings_model
|
| 651 |
+
astra_db_result = init_astra_db()
|
| 652 |
+
if astra_db_result:
|
| 653 |
+
astra_session = astra_db_result.get("db")
|
| 654 |
+
astra_keyspace = astra_db_result.get("keyspace")
|
| 655 |
+
else:
|
| 656 |
+
astra_session = None
|
| 657 |
+
astra_keyspace = None
|
| 658 |
+
|
| 659 |
s3_client = init_s3_client()
|
| 660 |
embeddings_model = get_embeddings_model()
|
| 661 |
|