Commit
·
d916930
1
Parent(s):
c7743de
Đặt biến MONGODB_DATABASE ở config và sửa nhẹ UI
Browse files- app.py +2 -2
- config.py +3 -1
- data_indexing.py +3 -2
- data_processing.py +6 -6
app.py
CHANGED
|
@@ -23,7 +23,7 @@ def create_combined_app():
|
|
| 23 |
|
| 24 |
with gr.Tabs() as tabs:
|
| 25 |
# Tab 1: Vector Indexing (MongoDB to Qdrant)
|
| 26 |
-
with gr.Tab("🗄️
|
| 27 |
gr.Markdown("""
|
| 28 |
## Indexing dữ liệu từ MongoDB lên Qdrant
|
| 29 |
|
|
@@ -55,7 +55,7 @@ def create_combined_app():
|
|
| 55 |
2. Cấu hình MongoDB connection string, database name và test connection
|
| 56 |
3. Chọn loại sản phẩm hoặc xử lý tất cả.
|
| 57 |
|
| 58 |
-
**Bước 2:
|
| 59 |
1. Chọn collection cần indexing
|
| 60 |
2. Hệ thống sẽ tạo embeddings và đẩy lên Qdrant
|
| 61 |
|
|
|
|
| 23 |
|
| 24 |
with gr.Tabs() as tabs:
|
| 25 |
# Tab 1: Vector Indexing (MongoDB to Qdrant)
|
| 26 |
+
with gr.Tab("🗄️ Data Indexing", id="indexing"):
|
| 27 |
gr.Markdown("""
|
| 28 |
## Indexing dữ liệu từ MongoDB lên Qdrant
|
| 29 |
|
|
|
|
| 55 |
2. Cấu hình MongoDB connection string, database name và test connection
|
| 56 |
3. Chọn loại sản phẩm hoặc xử lý tất cả.
|
| 57 |
|
| 58 |
+
**Bước 2: Data Indexing**
|
| 59 |
1. Chọn collection cần indexing
|
| 60 |
2. Hệ thống sẽ tạo embeddings và đẩy lên Qdrant
|
| 61 |
|
config.py
CHANGED
|
@@ -26,6 +26,8 @@ IMAGE_EMBEDDING_MODEL = "google/efficientnet-b3"
|
|
| 26 |
IMAGE_EMBEDDING_SIZE = 1536
|
| 27 |
|
| 28 |
MONGODB_URI = os.getenv("MONGODB_URI", "mongodb://localhost:27017/")
|
|
|
|
| 29 |
|
| 30 |
QDRANT_HOST = os.getenv("QDRANT_HOST")
|
| 31 |
-
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
|
|
|
|
|
| 26 |
IMAGE_EMBEDDING_SIZE = 1536
|
| 27 |
|
| 28 |
MONGODB_URI = os.getenv("MONGODB_URI", "mongodb://localhost:27017/")
|
| 29 |
+
MONGODB_DATABASE = "dataset"
|
| 30 |
|
| 31 |
QDRANT_HOST = os.getenv("QDRANT_HOST")
|
| 32 |
+
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
| 33 |
+
|
data_indexing.py
CHANGED
|
@@ -45,7 +45,8 @@ from config import (
|
|
| 45 |
TEXT_EMBEDDING_MODEL,
|
| 46 |
MONGODB_URI,
|
| 47 |
QDRANT_HOST,
|
| 48 |
-
QDRANT_API_KEY
|
|
|
|
| 49 |
)
|
| 50 |
|
| 51 |
from data_helper import *
|
|
@@ -154,7 +155,7 @@ mongodb_solution_collections = {
|
|
| 154 |
|
| 155 |
"""=================MONGODB CONNECTION========================"""
|
| 156 |
class MongoDBConnection:
|
| 157 |
-
def __init__(self, connection_string: str = None, db_name: str =
|
| 158 |
"""
|
| 159 |
Initialize MongoDB connection
|
| 160 |
Args:
|
|
|
|
| 45 |
TEXT_EMBEDDING_MODEL,
|
| 46 |
MONGODB_URI,
|
| 47 |
QDRANT_HOST,
|
| 48 |
+
QDRANT_API_KEY,
|
| 49 |
+
MONGODB_DATABASE
|
| 50 |
)
|
| 51 |
|
| 52 |
from data_helper import *
|
|
|
|
| 155 |
|
| 156 |
"""=================MONGODB CONNECTION========================"""
|
| 157 |
class MongoDBConnection:
|
| 158 |
+
def __init__(self, connection_string: str = None, db_name: str = MONGODB_DATABASE):
|
| 159 |
"""
|
| 160 |
Initialize MongoDB connection
|
| 161 |
Args:
|
data_processing.py
CHANGED
|
@@ -24,7 +24,7 @@ from pymongo import MongoClient, UpdateOne
|
|
| 24 |
from pymongo.errors import ConnectionFailure, OperationFailure
|
| 25 |
|
| 26 |
from data_helper import *
|
| 27 |
-
from config import MONGODB_URI
|
| 28 |
|
| 29 |
# Suppress PyTorch DataLoader pin_memory warning on MPS
|
| 30 |
warnings.filterwarnings("ignore", message=".*pin_memory.*not supported on MPS.*")
|
|
@@ -32,7 +32,7 @@ warnings.filterwarnings("ignore", message=".*pin_memory.*not supported on MPS.*"
|
|
| 32 |
class MongoDBHandler:
|
| 33 |
"""Handler for MongoDB operations"""
|
| 34 |
|
| 35 |
-
def __init__(self, connection_string: str = None, database_name: str =
|
| 36 |
"""
|
| 37 |
Initialize MongoDB connection
|
| 38 |
|
|
@@ -512,7 +512,7 @@ def process_single_category(excel_path, category_name, processor_type,
|
|
| 512 |
# Initialize MongoDB handler
|
| 513 |
mongo_handler = MongoDBHandler(
|
| 514 |
connection_string=mongo_connection if mongo_connection else None,
|
| 515 |
-
database_name=mongo_database if mongo_database else
|
| 516 |
)
|
| 517 |
|
| 518 |
progress(0.3, desc=f"Processing {category_name} with {processor_type}...")
|
|
@@ -564,7 +564,7 @@ def test_mongo_connection(connection_string, database_name):
|
|
| 564 |
if not connection_string:
|
| 565 |
connection_string = "mongodb://localhost:27017/"
|
| 566 |
if not database_name:
|
| 567 |
-
database_name =
|
| 568 |
|
| 569 |
handler = MongoDBHandler(connection_string, database_name)
|
| 570 |
return handler.test_connection()
|
|
@@ -615,8 +615,8 @@ def create_processing_interface():
|
|
| 615 |
)
|
| 616 |
mongo_database = gr.Textbox(
|
| 617 |
label="Database Name",
|
| 618 |
-
placeholder="
|
| 619 |
-
value=
|
| 620 |
info="Name of the MongoDB database"
|
| 621 |
)
|
| 622 |
test_connection_btn = gr.Button("🔌 Test Connection", size="sm")
|
|
|
|
| 24 |
from pymongo.errors import ConnectionFailure, OperationFailure
|
| 25 |
|
| 26 |
from data_helper import *
|
| 27 |
+
from config import MONGODB_URI, MONGODB_DATABASE
|
| 28 |
|
| 29 |
# Suppress PyTorch DataLoader pin_memory warning on MPS
|
| 30 |
warnings.filterwarnings("ignore", message=".*pin_memory.*not supported on MPS.*")
|
|
|
|
| 32 |
class MongoDBHandler:
|
| 33 |
"""Handler for MongoDB operations"""
|
| 34 |
|
| 35 |
+
def __init__(self, connection_string: str = None, database_name: str = MONGODB_DATABASE):
|
| 36 |
"""
|
| 37 |
Initialize MongoDB connection
|
| 38 |
|
|
|
|
| 512 |
# Initialize MongoDB handler
|
| 513 |
mongo_handler = MongoDBHandler(
|
| 514 |
connection_string=mongo_connection if mongo_connection else None,
|
| 515 |
+
database_name=mongo_database if mongo_database else MONGODB_DATABASE
|
| 516 |
)
|
| 517 |
|
| 518 |
progress(0.3, desc=f"Processing {category_name} with {processor_type}...")
|
|
|
|
| 564 |
if not connection_string:
|
| 565 |
connection_string = "mongodb://localhost:27017/"
|
| 566 |
if not database_name:
|
| 567 |
+
database_name = MONGODB_DATABASE
|
| 568 |
|
| 569 |
handler = MongoDBHandler(connection_string, database_name)
|
| 570 |
return handler.test_connection()
|
|
|
|
| 615 |
)
|
| 616 |
mongo_database = gr.Textbox(
|
| 617 |
label="Database Name",
|
| 618 |
+
placeholder="MONGODB_DATABASE",
|
| 619 |
+
value=MONGODB_DATABASE,
|
| 620 |
info="Name of the MongoDB database"
|
| 621 |
)
|
| 622 |
test_connection_btn = gr.Button("🔌 Test Connection", size="sm")
|