Spaces:
Sleeping
Sleeping
SYNC with v0.1.3
Browse files
visual_rag/__init__.py
CHANGED
|
@@ -31,8 +31,48 @@ Quick Start:
|
|
| 31 |
Each component works independently - use only what you need.
|
| 32 |
"""
|
| 33 |
|
|
|
|
|
|
|
| 34 |
__version__ = "0.1.3"
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# Import main classes at package level for convenience
|
| 37 |
# These are optional - if dependencies aren't installed, we catch the error
|
| 38 |
|
|
@@ -95,4 +135,6 @@ __all__ = [
|
|
| 95 |
"load_config",
|
| 96 |
"get",
|
| 97 |
"get_section",
|
|
|
|
|
|
|
| 98 |
]
|
|
|
|
| 31 |
Each component works independently - use only what you need.
|
| 32 |
"""
|
| 33 |
|
| 34 |
+
import logging
|
| 35 |
+
|
| 36 |
__version__ = "0.1.3"
|
| 37 |
|
| 38 |
+
|
| 39 |
+
def setup_logging(level: str = "INFO", format: str = None) -> None:
|
| 40 |
+
"""
|
| 41 |
+
Configure logging for visual_rag package.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
level: Log level ("DEBUG", "INFO", "WARNING", "ERROR")
|
| 45 |
+
format: Custom format string. Default shows time, level, and message.
|
| 46 |
+
|
| 47 |
+
Example:
|
| 48 |
+
>>> import visual_rag
|
| 49 |
+
>>> visual_rag.setup_logging("INFO")
|
| 50 |
+
>>> # Now you'll see processing logs
|
| 51 |
+
"""
|
| 52 |
+
if format is None:
|
| 53 |
+
format = "[%(asctime)s] %(levelname)s - %(message)s"
|
| 54 |
+
|
| 55 |
+
logging.basicConfig(
|
| 56 |
+
level=getattr(logging, level.upper(), logging.INFO),
|
| 57 |
+
format=format,
|
| 58 |
+
datefmt="%H:%M:%S",
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
# Also set the visual_rag logger specifically
|
| 62 |
+
logger = logging.getLogger("visual_rag")
|
| 63 |
+
logger.setLevel(getattr(logging, level.upper(), logging.INFO))
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# Enable INFO logging by default for visual_rag package and all submodules
|
| 67 |
+
# This ensures logs like "Processing PDF...", "Embedding pages..." are visible
|
| 68 |
+
_logger = logging.getLogger("visual_rag")
|
| 69 |
+
if not _logger.handlers:
|
| 70 |
+
_handler = logging.StreamHandler()
|
| 71 |
+
_handler.setFormatter(logging.Formatter("[%(asctime)s] %(message)s", datefmt="%H:%M:%S"))
|
| 72 |
+
_logger.addHandler(_handler)
|
| 73 |
+
_logger.setLevel(logging.INFO)
|
| 74 |
+
_logger.propagate = False # Don't duplicate to root logger
|
| 75 |
+
|
| 76 |
# Import main classes at package level for convenience
|
| 77 |
# These are optional - if dependencies aren't installed, we catch the error
|
| 78 |
|
|
|
|
| 135 |
"load_config",
|
| 136 |
"get",
|
| 137 |
"get_section",
|
| 138 |
+
# Logging
|
| 139 |
+
"setup_logging",
|
| 140 |
]
|
visual_rag/indexing/qdrant_indexer.py
CHANGED
|
@@ -381,6 +381,8 @@ class QdrantIndexer:
|
|
| 381 |
wait=wait,
|
| 382 |
)
|
| 383 |
|
|
|
|
|
|
|
| 384 |
if delay_between_batches > 0:
|
| 385 |
if _is_cancelled():
|
| 386 |
return 0
|
|
|
|
| 381 |
wait=wait,
|
| 382 |
)
|
| 383 |
|
| 384 |
+
logger.info(f" ✅ Uploaded {len(points)} points to Qdrant")
|
| 385 |
+
|
| 386 |
if delay_between_batches > 0:
|
| 387 |
if _is_cancelled():
|
| 388 |
return 0
|
visual_rag/retrieval/single_stage.py
CHANGED
|
@@ -30,6 +30,9 @@ class SingleStageRetriever:
|
|
| 30 |
Args:
|
| 31 |
qdrant_client: Connected Qdrant client
|
| 32 |
collection_name: Name of the Qdrant collection
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
Example:
|
| 35 |
>>> retriever = SingleStageRetriever(client, "my_collection")
|
|
@@ -41,10 +44,14 @@ class SingleStageRetriever:
|
|
| 41 |
qdrant_client,
|
| 42 |
collection_name: str,
|
| 43 |
request_timeout: int = 120,
|
|
|
|
|
|
|
| 44 |
):
|
| 45 |
self.client = qdrant_client
|
| 46 |
self.collection_name = collection_name
|
| 47 |
self.request_timeout = int(request_timeout)
|
|
|
|
|
|
|
| 48 |
|
| 49 |
def search(
|
| 50 |
self,
|
|
|
|
| 30 |
Args:
|
| 31 |
qdrant_client: Connected Qdrant client
|
| 32 |
collection_name: Name of the Qdrant collection
|
| 33 |
+
request_timeout: Timeout for Qdrant requests (seconds)
|
| 34 |
+
max_retries: Number of retry attempts on failure
|
| 35 |
+
retry_sleep: Sleep time between retries (seconds)
|
| 36 |
|
| 37 |
Example:
|
| 38 |
>>> retriever = SingleStageRetriever(client, "my_collection")
|
|
|
|
| 44 |
qdrant_client,
|
| 45 |
collection_name: str,
|
| 46 |
request_timeout: int = 120,
|
| 47 |
+
max_retries: int = 3,
|
| 48 |
+
retry_sleep: float = 1.0,
|
| 49 |
):
|
| 50 |
self.client = qdrant_client
|
| 51 |
self.collection_name = collection_name
|
| 52 |
self.request_timeout = int(request_timeout)
|
| 53 |
+
self.max_retries = max_retries
|
| 54 |
+
self.retry_sleep = retry_sleep
|
| 55 |
|
| 56 |
def search(
|
| 57 |
self,
|