Spaces:

MuhammadNoman7600
/

derm-ai

Runtime error

App Files Files Community

muhammadnoman76 commited on Aug 28, 2025

Commit

e02b28a

1 Parent(s): fe1a3c4

update

Browse files

Files changed (2) hide show

app/services/chat_processor.py +152 -158
app/services/vector_database_search.py +126 -36

app/services/chat_processor.py CHANGED Viewed

@@ -9,7 +9,16 @@ from app.services.environmental_condition import EnvironmentalData
 from app.services.prompts import *
 from app.services.vector_database_search import VectorDatabaseSearch
 import re
-vectordb = VectorDatabaseSearch()
 class ChatProcessor:
     def __init__(self, token: str, session_id: Optional[str] = None, num_results: int = 3, num_images: int = 3):
@@ -58,26 +67,32 @@ class ChatProcessor:
             name = profile['name']
             age = profile['age']
             self.chat_session.load_chat_history()
-            self.chat_session.update_title(self.session_id,query)
             history = self.chat_session.format_history()
-            history_based_prompt = HISTORY_BASED_PROMPT.format(history=history,query= query)
             enhanced_query = Model().send_message_openrouter(history_based_prompt)
             self.session_id = self.ensure_valid_session(title=enhanced_query)
             permission = self.chat_session.get_user_preferences()
-            websearch_enabled  = permission.get('websearch', False)
             env_recommendations = permission.get('environmental_recommendations', False)
             personalized_recommendations = permission.get('personalized_recommendations', False)
             keywords_permission = permission.get('keywords', False)
             reference_permission = permission.get('references', False)
             language = self.chat_session.get_language().lower()
-            language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language = language)
-            if websearch_enabled :
                 with ThreadPoolExecutor(max_workers=2) as executor:
                     future_web = executor.submit(self.web_searcher.search, enhanced_query)
                     future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
@@ -93,186 +108,165 @@ class ChatProcessor:
                         references.append(result['link'])
                 context = "\n".join(context_parts)
-                if env_recommendations and personalized_recommendations:
-                    prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
-                        user_name=name,
-                        user_age=age,
-                        history=history,
-                        user_details=self.chat_session.get_personalized_recommendation(),
-                        environmental_condition=self.environment_data.get_environmental_data(),
-                        previous_history=history,
-                        context=context,
-                        current_query=enhanced_query
-                    )
-                elif personalized_recommendations:
-                    prompt = PERSONALIZED_PROMPT.format(
-                        user_name=name,
-                        user_age=age,
-                        user_details=self.chat_session.get_personalized_recommendation(),
-                        previous_history=history,
-                        context=context,
-                        current_query=enhanced_query
-                    )
-                elif env_recommendations :
-                    prompt = ENVIRONMENTAL_PROMPT.format(
-                        user_name=name,
-                        user_age=age,
-                        environmental_condition=self.environment_data.get_environmental_data(),
-                        previous_history=history,
-                        context=context,
-                        current_query=enhanced_query
-                    )
-                else:
-                    prompt = DEFAULT_PROMPT.format(
-                        previous_history=history,
-                        context=context,
-                        current_query=enhanced_query
-                    )
-                prompt = prompt + language_prompt
-                response = Model().llm(prompt,enhanced_query)
-                keywords = ""
-                if (keywords_permission):
-                    keywords = self.extract_keywords_yake(response, language=language)
-                if (not reference_permission):
-                    references = ""
-                chat_data = {
-                    "query": enhanced_query,
-                    "response": response,
-                    "references": references,
-                    "page_no": "",
-                    "keywords": keywords,
-                    "images": image_results,
-                    "context": context,
-                    "timestamp": datetime.now(timezone.utc).isoformat(),
-                    "session_id": self.chat_session.session_id
-                }
-                if not self.chat_session.save_chat(chat_data):
-                    raise ValueError("Failed to save chat message")
-                return chat_data
             else:
                 attach_image = False
-                with ThreadPoolExecutor(max_workers=2) as executor:
                     future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
                     image_results = future_images.result()
                 start_time = datetime.now(timezone.utc)
-                results = vectordb.search( query=enhanced_query, top_k=3)
                 context_parts = []
                 references = []
-                seen_pages = set()
                 for result in results:
-                    confidence = result['confidence']
-                    if confidence > 60:
                         context_parts.append(f"Content: {result['content']}")
-                        page = result['page']
-                        if page not in seen_pages:  # Only append if page is not seen
-                            references.append(f"Source: {result['source']}, Page: {page}")
-                            seen_pages.add(page)
-                        attach_image = True
                 context = "\n".join(context_parts)
-                if not context or len(context) < 10:
-                    context = "There is no context found unfortunately"
-                if env_recommendations  and personalized_recommendations:
-                    prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
-                        user_name=name,
-                        user_age = age,
-                        history=history,
-                        user_details=self.chat_session.get_personalized_recommendation(),
-                        environmental_condition=self.environment_data.get_environmental_data(),
-                        previous_history=history,
-                        context=context,
-                        current_query=enhanced_query
-                    )
-                elif personalized_recommendations:
-                    prompt = PERSONALIZED_PROMPT.format(
-                        user_name=name,
-                        user_age=age,
-                        user_details=self.chat_session.get_personalized_recommendation(),
-                        previous_history=history,
-                        context=context,
-                        current_query=enhanced_query
-                    )
-                elif env_recommendations :
-                    prompt = ENVIRONMENTAL_PROMPT.format(
-                        user_name=name,
-                        user_age=age,
-                        environmental_condition=self.environment_data.get_environmental_data(),
-                        previous_history=history,
-                        context=context,
-                        current_query=enhanced_query
-                    )
-                else:
-                    prompt = DEFAULT_PROMPT.format(
-                        previous_history=history,
-                        context=context,
-                        current_query=enhanced_query
-                    )
-                prompt = prompt + language_prompt
-                response = Model().response = Model().llm(prompt,query)
                 end_time = datetime.now(timezone.utc)
                 keywords = ""
-                if (keywords_permission):
-                    keywords = self.extract_keywords_yake(response, language=language)
-                if (not reference_permission):
-                    references = ""
-                if  not attach_image:
-                    image_results = ""
-                    keywords = ""
-                chat_data = {
-                    "query": enhanced_query,
-                    "response": response,
-                    "references": references,
-                    "page_no": "",
-                    "keywords": keywords,
-                    "images": image_results,
-                    "context": context,
-                    "timestamp": datetime.now(timezone.utc).isoformat(),
-                    "session_id": self.chat_session.session_id
-                }
                 match = re.search(r'(## Personal Recommendations|## Environmental Considerations)', response)
-                if match:
-                    truncated_response = response[:match.start()].strip()
-                else:
-                    truncated_response = response
-                if not self.chat_session.save_details(session_id=self.session_id , context= context , query= enhanced_query , response=truncated_response , rag_start_time=start_time  , rag_end_time=end_time ):
-                    raise ValueError("Failed to save the RAG details")
-                if not self.chat_session.save_chat(chat_data):
-                    raise ValueError("Failed to save chat message")
-                return chat_data
         except Exception as e:
             return {
                 "error": str(e),
                 "query": query,
-                "response": "Sorry, there was an error processing your request.",
                 "timestamp": datetime.now(timezone.utc).isoformat()
             }
     def web_search(self, query: str) -> Dict[str, Any]:
-        if self.session_id and len(self.session_id) > 5:
-            return self.process_chat(query=query)
-        else:
-            return self.process_chat(query=query)

 from app.services.prompts import *
 from app.services.vector_database_search import VectorDatabaseSearch
 import re
+import logging
+logger = logging.getLogger(__name__)
+# Initialize vector database with error handling
+try:
+    vectordb = VectorDatabaseSearch()
+except Exception as e:
+    logger.error(f"Failed to initialize vector database: {e}")
+    vectordb = None
 class ChatProcessor:
     def __init__(self, token: str, session_id: Optional[str] = None, num_results: int = 3, num_images: int = 3):
             name = profile['name']
             age = profile['age']
             self.chat_session.load_chat_history()
+            self.chat_session.update_title(self.session_id, query)
             history = self.chat_session.format_history()
+            # Enhanced query generation
+            history_based_prompt = HISTORY_BASED_PROMPT.format(history=history, query=query)
             enhanced_query = Model().send_message_openrouter(history_based_prompt)
             self.session_id = self.ensure_valid_session(title=enhanced_query)
             permission = self.chat_session.get_user_preferences()
+            websearch_enabled = permission.get('websearch', False)
             env_recommendations = permission.get('environmental_recommendations', False)
             personalized_recommendations = permission.get('personalized_recommendations', False)
             keywords_permission = permission.get('keywords', False)
             reference_permission = permission.get('references', False)
             language = self.chat_session.get_language().lower()
+            language_prompt = LANGUAGE_RESPONSE_PROMPT.format(language=language)
+            # Check if vector database is available when websearch is disabled
+            vector_db_available = vectordb and vectordb.is_available() if not websearch_enabled else False
+            # If websearch is disabled and vector DB is not available, enable websearch as fallback
+            use_websearch = websearch_enabled or not vector_db_available
+            if use_websearch:
+                logger.info("Using web search for context")
                 with ThreadPoolExecutor(max_workers=2) as executor:
                     future_web = executor.submit(self.web_searcher.search, enhanced_query)
                     future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
                         references.append(result['link'])
                 context = "\n".join(context_parts)
+                # If web search returns no results, provide a helpful context
+                if not context:
+                    context = "No specific information found. Please provide general dermatological advice based on your expertise."
             else:
+                logger.info("Using vector database for context")
                 attach_image = False
+                with ThreadPoolExecutor(max_workers=1) as executor:
                     future_images = executor.submit(self.web_searcher.search_images, enhanced_query)
                     image_results = future_images.result()
                 start_time = datetime.now(timezone.utc)
+                # Search vector database
+                if vectordb:
+                    results = vectordb.search(query=enhanced_query, top_k=5)  # Increased top_k for better results
+                else:
+                    results = []
                 context_parts = []
                 references = []
+                seen_pages = set()
                 for result in results:
+                    confidence = result.get('confidence', 0)
+                    # Lowered confidence threshold for better recall
+                    if confidence > 30:
                         context_parts.append(f"Content: {result['content']}")
+                        source = result.get('source', 'Unknown')
+                        page = result.get('page', 0)
+                        page_key = f"{source}_{page}"
+                        if page_key not in seen_pages:
+                            references.append(f"Source: {source}, Page: {page}")
+                            seen_pages.add(page_key)
+                        attach_image = True
                 context = "\n".join(context_parts)
+                # Provide more helpful context when vector search returns nothing
+                if not context or len(context) < 50:
+                    logger.warning("Vector database returned insufficient context")
+                    # Fall back to web search if available
+                    if self.web_searcher:
+                        logger.info("Falling back to web search due to insufficient vector results")
+                        web_results = self.web_searcher.search(enhanced_query)
+                        context_parts = []
+                        references = []
+                        for idx, result in enumerate(web_results[:3], 1):
+                            if result['text']:
+                                context_parts.append(f"From Source {idx}: {result['text']}\n")
+                                references.append(result['link'])
+                        context = "\n".join(context_parts)
+                    if not context:
+                        context = "Based on general dermatological knowledge and best practices."
+                        attach_image = False
                 end_time = datetime.now(timezone.utc)
+            # Generate appropriate prompt based on user preferences
+            if env_recommendations and personalized_recommendations:
+                prompt = ENVIRONMENTAL_PERSONALIZED_PROMPT.format(
+                    user_name=name,
+                    user_age=age,
+                    history=history,
+                    user_details=self.chat_session.get_personalized_recommendation(),
+                    environmental_condition=self.environment_data.get_environmental_data(),
+                    previous_history=history,
+                    context=context,
+                    current_query=enhanced_query
+                )
+            elif personalized_recommendations:
+                prompt = PERSONALIZED_PROMPT.format(
+                    user_name=name,
+                    user_age=age,
+                    user_details=self.chat_session.get_personalized_recommendation(),
+                    previous_history=history,
+                    context=context,
+                    current_query=enhanced_query
+                )
+            elif env_recommendations:
+                prompt = ENVIRONMENTAL_PROMPT.format(
+                    user_name=name,
+                    user_age=age,
+                    environmental_condition=self.environment_data.get_environmental_data(),
+                    previous_history=history,
+                    context=context,
+                    current_query=enhanced_query
+                )
+            else:
+                prompt = DEFAULT_PROMPT.format(
+                    previous_history=history,
+                    context=context,
+                    current_query=enhanced_query
+                )
+            prompt = prompt + "\n" + language_prompt
+            # Generate response
+            response = Model().llm(prompt, enhanced_query)
+            # Extract keywords if enabled
+            keywords = ""
+            if keywords_permission:
+                keywords = self.extract_keywords_yake(response, language=language)
+            if not reference_permission:
+                references = ""
+            # Prepare images
+            if not use_websearch and not attach_image:
+                image_results = ""
                 keywords = ""
+            # Prepare chat data
+            chat_data = {
+                "query": enhanced_query,
+                "response": response,
+                "references": references,
+                "page_no": "",
+                "keywords": keywords,
+                "images": image_results if 'image_results' in locals() else "",
+                "context": context,
+                "timestamp": datetime.now(timezone.utc).isoformat(),
+                "session_id": self.chat_session.session_id
+            }
+            # Save RAG details if using vector database
+            if not use_websearch and 'start_time' in locals() and 'end_time' in locals():
                 match = re.search(r'(## Personal Recommendations|## Environmental Considerations)', response)
+                truncated_response = response[:match.start()].strip() if match else response
+                if not self.chat_session.save_details(
+                    session_id=self.session_id,
+                    context=context,
+                    query=enhanced_query,
+                    response=truncated_response,
+                    rag_start_time=start_time,
+                    rag_end_time=end_time
+                ):
+                    logger.warning("Failed to save RAG details")
+            # Save chat
+            if not self.chat_session.save_chat(chat_data):
+                raise ValueError("Failed to save chat message")
+            return chat_data
         except Exception as e:
+            logger.error(f"Error in process_chat: {str(e)}")
             return {
                 "error": str(e),
                 "query": query,
+                "response": "I apologize, but I'm experiencing technical difficulties. Please try again or enable web search in your preferences for better results.",
                 "timestamp": datetime.now(timezone.utc).isoformat()
             }
     def web_search(self, query: str) -> Dict[str, Any]:
+        """Public method for web search endpoint"""
+        return self.process_chat(query=query)

app/services/vector_database_search.py CHANGED Viewed

@@ -5,33 +5,81 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from langchain_qdrant import Qdrant
 from qdrant_client import QdrantClient, models
 from dotenv import load_dotenv
 load_dotenv()
 os.environ["GOOGLE_API_KEY"] = os.getenv("GEMINI_API_KEY")
 QDRANT_URL = os.getenv("QDRANT_URL")
 QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
-QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME")
 class VectorDatabaseSearch:
     def __init__(self, collection_name=QDRANT_COLLECTION_NAME):
         self.collection_name = collection_name
         self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
-        self.client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
-        self._initialize_collection()
-        self.vectorstore = Qdrant(
-            client=self.client,
-            collection_name=collection_name,
-            embeddings=self.embeddings
-        )
     def _initialize_collection(self):
         """Initialize Qdrant collection if it doesn't exist"""
         try:
             collections = self.client.get_collections()
-            if not any(c.name == self.collection_name for c in collections.collections):
                 self.client.create_collection(
                     collection_name=self.collection_name,
                     vectors_config=models.VectorParams(
@@ -39,12 +87,22 @@ class VectorDatabaseSearch:
                         distance=models.Distance.COSINE
                     )
                 )
-                print(f"Created collection: {self.collection_name}")
         except Exception as e:
-            print(f"Error initializing collection: {e}")
     def add_pdf(self, pdf_path):
         """Add PDF to vector database"""
         try:
             loader = PyPDFLoader(pdf_path)
             docs = loader.load()
@@ -52,75 +110,107 @@ class VectorDatabaseSearch:
             split_docs = splitter.split_documents(docs)
             book_name = os.path.splitext(os.path.basename(pdf_path))[0]
-            print(f"Processing {book_name} with {len(split_docs)} chunks")
             for doc in split_docs:
-                # Ensure metadata is stored in a consistent way
                 doc.metadata = {
                     "source": book_name,
                     "page": doc.metadata.get('page', 1),
                     "id": str(uuid.uuid4())
                 }
-            # Add documents to vector store
             self.vectorstore.add_documents(split_docs)
-            print(f"Added {len(split_docs)} chunks from {book_name}")
             return True
         except Exception as e:
-            print(f"Error adding PDF: {e}")
             return False
     def search(self, query, top_k=5):
         """Search documents based on query"""
         try:
             results = self.vectorstore.similarity_search_with_score(query, k=top_k)
             formatted = []
             for doc, score in results:
                 formatted.append({
-                    "source": doc.metadata['source'],
-                    "page": doc.metadata['page'],
                     "content": doc.page_content[:500],
-                    "confidence": round(score * 100, 2)
                 })
             return formatted
         except Exception as e:
-            print(f"Search error: {e}")
             return []
     def get_book_info(self):
         """Retrieve list of unique book sources in the collection"""
         try:
-            # First check if the collection exists
             collections = self.client.get_collections()
             if not any(c.name == self.collection_name for c in collections.collections):
-                print(f"Collection {self.collection_name} does not exist yet")
                 return []
-            # Get all points with payload from the collection
             points = self.client.scroll(
                 collection_name=self.collection_name,
-                limit=1000,
                 with_payload=True,
-                with_vectors=False  # We don't need vector data
             )[0]
-            # Debug information
-            print(f"Retrieved {len(points)} points from collection")
-            # Extract unique book sources from payloads
             books = set()
             for point in points:
-                # Check if payload exists and has 'metadata' field with 'source'
                 if hasattr(point, 'payload') and point.payload:
-                    # Check different possible payload structures
                     if 'metadata' in point.payload and 'source' in point.payload['metadata']:
                         books.add(point.payload['metadata']['source'])
                     elif 'source' in point.payload:
                         books.add(point.payload['source'])
-            print(f"Found {len(books)} unique books")
             return list(books)
         except Exception as e:
-            print(f"Error retrieving book info: {e}")
-            return []

 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from langchain_qdrant import Qdrant
 from qdrant_client import QdrantClient, models
+from qdrant_client.http.exceptions import UnexpectedResponse
 from dotenv import load_dotenv
+import logging
 load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 os.environ["GOOGLE_API_KEY"] = os.getenv("GEMINI_API_KEY")
 QDRANT_URL = os.getenv("QDRANT_URL")
 QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
+QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "dermatology_docs")
 class VectorDatabaseSearch:
     def __init__(self, collection_name=QDRANT_COLLECTION_NAME):
         self.collection_name = collection_name
         self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+        self.client = None
+        self.vectorstore = None
+        self.is_initialized = False
+        # Initialize connection
+        self._initialize_connection()
+    def _initialize_connection(self):
+        """Initialize Qdrant connection with proper error handling"""
+        try:
+            # Check if credentials are available
+            if not QDRANT_URL or not QDRANT_API_KEY:
+                logger.warning("Qdrant credentials not found. Vector search will be disabled.")
+                self.is_initialized = False
+                return
+            # Initialize Qdrant client
+            self.client = QdrantClient(
+                url=QDRANT_URL,
+                api_key=QDRANT_API_KEY,
+                timeout=30  # Add timeout
+            )
+            # Test connection
+            self.client.get_collections()
+            # Initialize collection
+            self._initialize_collection()
+            # Initialize vector store
+            self.vectorstore = Qdrant(
+                client=self.client,
+                collection_name=self.collection_name,
+                embeddings=self.embeddings
+            )
+            self.is_initialized = True
+            logger.info(f"Successfully connected to Qdrant collection: {self.collection_name}")
+        except UnexpectedResponse as e:
+            logger.error(f"Authentication error with Qdrant: {e}")
+            self.is_initialized = False
+        except Exception as e:
+            logger.error(f"Error initializing Qdrant connection: {e}")
+            self.is_initialized = False
     def _initialize_collection(self):
         """Initialize Qdrant collection if it doesn't exist"""
+        if not self.client:
+            return
         try:
             collections = self.client.get_collections()
+            collection_exists = any(c.name == self.collection_name for c in collections.collections)
+            if not collection_exists:
                 self.client.create_collection(
                     collection_name=self.collection_name,
                     vectors_config=models.VectorParams(
                         distance=models.Distance.COSINE
                     )
                 )
+                logger.info(f"Created new collection: {self.collection_name}")
+            else:
+                # Check if collection has data
+                collection_info = self.client.get_collection(self.collection_name)
+                logger.info(f"Collection {self.collection_name} exists with {collection_info.points_count} points")
         except Exception as e:
+            logger.error(f"Error initializing collection: {e}")
+            self.is_initialized = False
     def add_pdf(self, pdf_path):
         """Add PDF to vector database"""
+        if not self.is_initialized:
+            logger.error("Vector database not initialized. Cannot add PDF.")
+            return False
         try:
             loader = PyPDFLoader(pdf_path)
             docs = loader.load()
             split_docs = splitter.split_documents(docs)
             book_name = os.path.splitext(os.path.basename(pdf_path))[0]
+            logger.info(f"Processing {book_name} with {len(split_docs)} chunks")
             for doc in split_docs:
                 doc.metadata = {
                     "source": book_name,
                     "page": doc.metadata.get('page', 1),
                     "id": str(uuid.uuid4())
                 }
             self.vectorstore.add_documents(split_docs)
+            logger.info(f"Successfully added {len(split_docs)} chunks from {book_name}")
             return True
         except Exception as e:
+            logger.error(f"Error adding PDF: {e}")
             return False
     def search(self, query, top_k=5):
         """Search documents based on query"""
+        if not self.is_initialized:
+            logger.warning("Vector database not initialized. Returning empty results.")
+            return []
         try:
+            # Check if collection has any data
+            collection_info = self.client.get_collection(self.collection_name)
+            if collection_info.points_count == 0:
+                logger.warning(f"Collection {self.collection_name} is empty. No documents to search.")
+                return []
+            # Perform similarity search
             results = self.vectorstore.similarity_search_with_score(query, k=top_k)
             formatted = []
             for doc, score in results:
+                # Convert score to confidence percentage (cosine similarity)
+                confidence = (1 - score) * 100  # Qdrant returns distance, not similarity
                 formatted.append({
+                    "source": doc.metadata.get('source', 'Unknown'),
+                    "page": doc.metadata.get('page', 0),
                     "content": doc.page_content[:500],
+                    "confidence": round(confidence, 2)
                 })
+            logger.info(f"Found {len(formatted)} results for query: {query[:50]}...")
             return formatted
         except Exception as e:
+            logger.error(f"Search error: {e}")
             return []
     def get_book_info(self):
         """Retrieve list of unique book sources in the collection"""
+        if not self.is_initialized:
+            logger.warning("Vector database not initialized.")
+            return []
         try:
+            # Check if collection exists
             collections = self.client.get_collections()
             if not any(c.name == self.collection_name for c in collections.collections):
+                logger.info(f"Collection {self.collection_name} does not exist yet")
                 return []
+            # Get collection info
+            collection_info = self.client.get_collection(self.collection_name)
+            if collection_info.points_count == 0:
+                logger.info("Collection is empty")
+                return []
+            # Get sample of points to extract sources
             points = self.client.scroll(
                 collection_name=self.collection_name,
+                limit=min(1000, collection_info.points_count),
                 with_payload=True,
+                with_vectors=False
             )[0]
             books = set()
             for point in points:
                 if hasattr(point, 'payload') and point.payload:
                     if 'metadata' in point.payload and 'source' in point.payload['metadata']:
                         books.add(point.payload['metadata']['source'])
                     elif 'source' in point.payload:
                         books.add(point.payload['source'])
+            logger.info(f"Found {len(books)} unique books in collection")
             return list(books)
         except Exception as e:
+            logger.error(f"Error retrieving book info: {e}")
+            return []
+    def is_available(self):
+        """Check if vector database is available and has data"""
+        if not self.is_initialized:
+            return False
+        try:
+            collection_info = self.client.get_collection(self.collection_name)
+            return collection_info.points_count > 0
+        except:
+            return False