Spaces:

Hammad712
/

MAAS

Sleeping

App Files Files Community

Hammad712 commited on Jul 14, 2025

Commit

b3cb317

1 Parent(s): 56efa3b

Refactored vectorstore path to include document type (doc_type)

Browse files

Files changed (3) hide show

app/page_speed/services.py +1 -5
app/rag/routes.py +72 -108
app/rag/utils.py +57 -100

app/page_speed/services.py CHANGED Viewed

@@ -126,11 +126,7 @@ class PageSpeedService:
         """
         logger.debug("Building Gemini analysis prompt from PageSpeed data.")
         return f"""
-<<<<<<< HEAD:app/services.py
-    You are an **Expert Web Performance Optimization Consultant**. The following JSON `{pagespeed_data}` contains exactly these keys (all required):
-=======
-    You are an **Expert Web Performance Optimization Consultant**. The following JSON `{{pagespeed_data}}` includes detailed website performance metrics from Google PageSpeed Insights.
->>>>>>> 574c6ac (Update endpoints):app/page_speed/services.py
     Your task is to analyze this data and generate a human-friendly performance **report in plain English**. The report will be read by a **non-technical business owner**, so keep it understandable while explaining technical concepts briefly when necessary.

         """
         logger.debug("Building Gemini analysis prompt from PageSpeed data.")
         return f"""
+    You are an **Expert Web Performance Optimization Consultant**. The following JSON page speed data includes detailed website performance metrics from Google PageSpeed Insights.
     Your task is to analyze this data and generate a human-friendly performance **report in plain English**. The report will be read by a **non-technical business owner**, so keep it understandable while explaining technical concepts briefly when necessary.

app/rag/routes.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import os
 import uuid
-from fastapi import APIRouter, HTTPException
 from .schemas import SetupRequest, ChatRequest, SetupResponse, ChatResponse
 from .utils import (
-    get_vectorstore_metadata,
     text_splitter,
     embeddings,
     save_vectorstore_to_disk,
     upsert_vectorstore_metadata,
-    get_vectorstore_path,
     build_rag_chain
 )
 from .chat_history import ChatHistoryManager
@@ -17,139 +17,103 @@ from .logging_config import logger
 router = APIRouter(prefix="/rag", tags=["rag"])
-@router.post("/initialization/{onboarding_id}", response_model=SetupResponse)
-async def setup_rag_session(onboarding_id: str, body: SetupRequest):
     """
-    Single endpoint to ingest documents and create a chat session.
-    - If vectorstore exists for user_id, skip ingestion.
     - Always create a new chat_id for this session.
     """
-    # 1. Handle vectorstore existence
-    vectorstore_path = get_vectorstore_path(onboarding_id)
-    if os.path.isdir(vectorstore_path):
         logger.info(
-            "Vectorstore exists for onboarding_id=%s at %s; skipping ingestion",
-            onboarding_id, vectorstore_path
         )
-        vs_path = vectorstore_path
-        chat_session = get_vectorstore_metadata(onboarding_id)
-        if chat_session:
-            chat_id = str(chat_session["chat_id"])
-            logger.info("Using existing chat session id=%s for onboarding_id=%s", chat_id, onboarding_id)
         else:
-            logger.warning("No chat session found for onboarding_id=%s", onboarding_id)
         return SetupResponse(
             success=True,
             message="RAG setup completed with existing vectorstore.",
             onboarding_id=onboarding_id,
             chat_id=chat_id,
-            vectorstore_path=vs_path
         )
-    else:
-        if not body.documents:
-            logger.error(
-                "Vectorstore missing for onboarding_id=%s and no documents provided", onboarding_id
-            )
-            raise HTTPException(
-                status_code=400,
-                detail="Vectorstore does not exist; please provide documents to ingest."
-            )
-        # Create new chat session
-        chat_id = str(uuid.uuid4())
-        ChatHistoryManager.create_session(chat_id)
-        logger.info(
-            "Created new chat session %s for onboarding_id=%s",
-            chat_id, onboarding_id
-        )
-        # Ingest new vectorstore
-        all_text = "\n\n".join(body.documents)
-        text_chunks = text_splitter.split_text(all_text)
-        logger.info("Split into %d chunks for ingestion", len(text_chunks))
-        from langchain.vectorstores import FAISS as _FAISS
-        vs = _FAISS.from_texts(texts=text_chunks, embedding=embeddings)
-        vs_path = save_vectorstore_to_disk(vs, onboarding_id)
-        logger.info("Saved FAISS index to %s", vs_path)
-        upsert_vectorstore_metadata(onboarding_id, vs_path, chat_id)
-        logger.info(
-            "Upserted vectorstore metadata for onboarding_id=%s", onboarding_id
         )
     return SetupResponse(
         success=True,
         message="RAG setup completed.",
         onboarding_id=onboarding_id,
         chat_id=chat_id,
         vectorstore_path=vs_path
     )
-@router.post("/chat/{onboarding_id}/{chat_id}", response_model=ChatResponse)
-async def chat_with_user(onboarding_id: str, chat_id: str, prompt_type: str, body: ChatRequest):
     """
-    Chat endpoint that uses an existing chat session and vectorstore.
-    - Validates that the vectorstore exists for onboarding_id.
-    - Validates that the chat session exists.
     """
-    # 0. Validate vectorstore
-    vectorstore_path = get_vectorstore_path(onboarding_id)
-    if not os.path.isdir(vectorstore_path):
-        logger.error("Vectorstore not found for onboarding_id=%s", onboarding_id)
-        raise HTTPException(
-            status_code=400,
-            detail="Vectorstore not found for this onboarding_id. Please run /setup first."
-        )
-    # 1. Ensure chat session exists
     if not ChatHistoryManager.chat_exists(chat_id):
-        logger.error("Chat session %s not found for onboarding_id=%s", chat_id, onboarding_id)
-        raise HTTPException(
-            status_code=404,
-            detail=f"Chat session {chat_id} does not exist."
-        )
     question = body.question.strip()
-    logger.info("Chat request onboarding_id=%s chat=%s question=%s", onboarding_id, chat_id, question)
-    try:
-        # Summarize long histories
-        ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
-        # Record the user message
-        ChatHistoryManager.add_message(chat_id, role="human", content=question)
-        # Build and invoke the RAG chain
-        chain = build_rag_chain(onboarding_id, chat_id, prompt_type)
-        history = ChatHistoryManager.get_messages(chat_id)
-        result = chain.invoke({"question": question, "chat_history": history})
-        answer = result.get("answer") or result.get("output_text")
-        if not answer:
-            raise Exception("No answer returned from chain")
-        # Record the AI response
-        ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
-        return ChatResponse(
-            success=True,
-            answer=answer,
-            error=None,
-            chat_id=chat_id,
-            onboarding_id=onboarding_id
-        )
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error("Error chatting onboarding_id=%s chat=%s: %s", onboarding_id, chat_id, e, exc_info=True)
-        return ChatResponse(
-            success=False,
-            answer=None,
-            error=str(e),
-            chat_id=chat_id,
-            onboarding_id=onboarding_id
-        )

 import os
 import uuid
+from fastapi import APIRouter, HTTPException, Path, Query
 from .schemas import SetupRequest, ChatRequest, SetupResponse, ChatResponse
 from .utils import (
+    get_vectorstore_path,
     text_splitter,
     embeddings,
     save_vectorstore_to_disk,
     upsert_vectorstore_metadata,
+    get_vectorstore_metadata,
     build_rag_chain
 )
 from .chat_history import ChatHistoryManager
 router = APIRouter(prefix="/rag", tags=["rag"])
+@router.post("/initialization/{onboarding_id}/{doc_type}", response_model=SetupResponse)
+async def setup_rag_session(
+    onboarding_id: str = Path(..., description="Unique onboarding identifier"),
+    doc_type: str = Path(..., description="Type of document (e.g., page_speed, seo, uiux)"),
+    body: SetupRequest = ...
+):
     """
+    Ingest documents under a specific document type and create a chat session.
+    - If vectorstore exists for onboarding_id and doc_type, skip ingestion.
     - Always create a new chat_id for this session.
     """
+    vectorstore_path = get_vectorstore_path(onboarding_id, doc_type)
+    # Existing vectorstore
+    if os.path.isdir(os.path.join(vectorstore_path, "faiss_index")):
         logger.info(
+            "Vectorstore exists for onboarding_id=%s, doc_type=%s; skipping ingestion",
+            onboarding_id, doc_type
         )
+        metadata = get_vectorstore_metadata(onboarding_id, doc_type)
+        if metadata and metadata.get("chat_id"):
+            chat_id = metadata["chat_id"]
         else:
+            chat_id = str(uuid.uuid4())
+            ChatHistoryManager.create_session(chat_id)
+            upsert_vectorstore_metadata(onboarding_id, doc_type, vectorstore_path, chat_id)
         return SetupResponse(
             success=True,
             message="RAG setup completed with existing vectorstore.",
             onboarding_id=onboarding_id,
+            doc_type=doc_type,
             chat_id=chat_id,
+            vectorstore_path=vectorstore_path
         )
+    # New ingestion
+    if not body.documents:
+        logger.error(
+            "Missing documents for onboarding_id=%s, doc_type=%s",
+            onboarding_id, doc_type
         )
+        raise HTTPException(status_code=400, detail="Please provide documents to ingest.")
+    # Create session and ingest
+    chat_id = str(uuid.uuid4())
+    ChatHistoryManager.create_session(chat_id)
+    all_text = "\n\n".join(body.documents)
+    text_chunks = text_splitter.split_text(all_text)
+    vs = __import__("langchain_community.vectorstores").vectorstores.FAISS.from_texts(
+        texts=text_chunks,
+        embedding=embeddings
+    )
+    vs_path = save_vectorstore_to_disk(vs, onboarding_id, doc_type)
+    upsert_vectorstore_metadata(onboarding_id, doc_type, vs_path, chat_id)
     return SetupResponse(
         success=True,
         message="RAG setup completed.",
         onboarding_id=onboarding_id,
+        doc_type=doc_type,
         chat_id=chat_id,
         vectorstore_path=vs_path
     )
+@router.post("/chat/{onboarding_id}/{doc_type}/{chat_id}", response_model=ChatResponse)
+async def chat_with_user(
+    onboarding_id: str = Path(...),
+    doc_type: str = Path(...),
+    chat_id: str = Path(...),
+    prompt_type: str = Query(..., description="Prompt type, e.g., page_speed or seo"),
+    body: ChatRequest = ...
+):
     """
+    Chat endpoint using a specific document-type vectorstore.
     """
+    vectorstore_path = get_vectorstore_path(onboarding_id, doc_type)
+    if not os.path.isdir(os.path.join(vectorstore_path, "faiss_index")):
+        raise HTTPException(status_code=400, detail="Vectorstore not found; run initialization first.")
     if not ChatHistoryManager.chat_exists(chat_id):
+        raise HTTPException(status_code=404, detail=f"Chat session {chat_id} not found.")
     question = body.question.strip()
+    ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
+    ChatHistoryManager.add_message(chat_id, role="human", content=question)
+    chain = build_rag_chain(onboarding_id, doc_type, chat_id, prompt_type)
+    history = ChatHistoryManager.get_messages(chat_id)
+    result = chain.invoke({"question": question, "chat_history": history})
+    answer = result.get("answer") or result.get("output_text")
+    ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
+    return ChatResponse(
+        success=True,
+        answer=answer,
+        error=None,
+        chat_id=chat_id,
+        onboarding_id=onboarding_id,
+        doc_type=doc_type
+    )

app/rag/utils.py CHANGED Viewed

@@ -4,145 +4,102 @@ from fastapi import HTTPException
 from langchain_community.vectorstores import FAISS
 from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
-from langchain.memory import ConversationBufferMemory                # ← IMPORT THIS
 from langchain.chains import ConversationalRetrievalChain
 from app.page_speed.config import settings
 from .db import vectorstore_meta_coll, chat_collection_name
 from .embeddings import embeddings, text_splitter, get_llm
 from .logging_config import logger
-from app.rag.prompt_library import page_speed_prompt, default_user_prompt,seo_prompt
-# ──────────────────────────────────────────────────────────────────────────────
-# 1. Helper: Path to Store (or Load) a User's FAISS Vectorstore on Disk
-# ──────────────────────────────────────────────────────────────────────────────
-def get_vectorstore_path(onboarding_id: str) -> str:
     """
-    Ensure a local directory exists for this user's vectorstore.
-    Returns a path like './vectorstores/{onboarding_id}'.
     """
     base_dir = settings.vectorstore_base_path
-    user_dir = os.path.join(base_dir, onboarding_id)
-    # os.makedirs(user_dir, exist_ok=True)
-    return user_dir
-# ──────────────────────────────────────────────────────────────────────────────
-# 2. Build or Load an Existing FAISS Index for a User
-# ──────────────────────────────────────────────────────────────────────────────
-def build_or_load_vectorstore(onboarding_id: str) -> FAISS:
-    """
-    Attempt to load an existing FAISS index for this user.
-    If not found on disk, raise a FileNotFoundError.
-    """
-    user_dir = get_vectorstore_path(onboarding_id)
-    faiss_index_path = os.path.join(user_dir, "faiss_index")
-    if not os.path.isdir(faiss_index_path):
-        raise FileNotFoundError(f"No vectorstore found at {faiss_index_path}")
-    # Allow loading your own index via pickle
-    return FAISS.load_local(
-        folder_path=faiss_index_path,
-        embeddings=embeddings,
-        allow_dangerous_deserialization=True
-    )
-# ──────────────────────────────────────────────────────────────────────────────
-# 3. Save a FAISS Vectorstore to Disk for a User
-# ──────────────────────────────────────────────────────────────────────────────
-def save_vectorstore_to_disk(vectorstore: FAISS, vectorstore_name: str) -> str:
     """
-    Save the FAISS vectorstore under './vectorstores/{user_id}/faiss_index'.
-    Returns the path to that saved folder.
     """
-    user_dir = get_vectorstore_path(vectorstore_name)
-    faiss_index_path = os.path.join(user_dir, "faiss_index")
     os.makedirs(faiss_index_path, exist_ok=True)
     vectorstore.save_local(folder_path=faiss_index_path)
     return faiss_index_path
-# ──────────────────────────────────────────────────────────────────────────────
-# 4. Upsert or Fetch Vectorstore Metadata in MongoDB
-# ──────────────────────────────────────────────────────────────────────────────
-def upsert_vectorstore_metadata(onboarding_id: str, vectorstore_path: str, chat_id: str) -> None:
-    """
-    Insert or update a document mapping user_id → vectorstore_path in MongoDB.
-    """
     vectorstore_meta_coll.update_one(
-        {"onboarding_id": onboarding_id},
-        {"$set": {"vectorstore_path": vectorstore_path},
-         "$setOnInsert": {"chat_id": chat_id}},
         upsert=True
     )
-def get_vectorstore_metadata(onboarding_id: str) -> Optional[Dict[str, Any]]:
-    """
-    Retrieve the metadata doc (if any) for this onboarding_id.
-    """
-    return vectorstore_meta_coll.find_one({"onboarding_id": onboarding_id})
-# ──────────────────────────────────────────────────────────────────────────────
-# 5. Initialize (or Return) a MongoDBChatMessageHistory for chat_id
-# ──────────────────────────────────────────────────────────────────────────────
-def initialize_chat_history(chat_id: str) -> MongoDBChatMessageHistory:
-    """
-    Create and return a MongoDBChatMessageHistory for the given chat_id.
-    """
-    return MongoDBChatMessageHistory(
         session_id=chat_id,
         connection_string=settings.mongo_uri,
         database_name=settings.mongo_chat_db,
         collection_name=chat_collection_name,
     )
-# ──────────────────────────────────────────────────────────────────────────────
-# 6. Build a ConversationalRetrievalChain (RAG Chain) for onboarding_id + chat_id
-# ──────────────────────────────────────────────────────────────────────────────
-def build_rag_chain(onboarding_id: str, chat_id: str, prompt_type: str) -> ConversationalRetrievalChain:
-    """
-    - Loads the FAISS index for onboarding_id.
-    - Creates a retriever (k=3).
-    - Wraps MongoDBChatMessageHistory in a ConversationBufferMemory.
-    - Attaches the ChatGroq LLM + user_prompt.
-    """
-    # 1. Load FAISS index (or 404 if not found)
-    try:
-        faiss_vs = build_or_load_vectorstore(onboarding_id)
-    except FileNotFoundError:
-        raise HTTPException(status_code=404, detail="Vectorstore not found for this onboarding_id. Call /rag/ingest first.")
-    retriever = faiss_vs.as_retriever(search_kwargs={"k": 5})
-    # 2. Instantiate a MongoDB-based chat history
-    chat_history = initialize_chat_history(chat_id)
-    # 3. Wrap that history in a ConversationBufferMemory, so the chain gets a valid "Memory" object
     memory = ConversationBufferMemory(
-        memory_key="chat_history",    # how the chain will reference the stored chat messages
-        chat_history=chat_history     # THIS tells the memory to use your MongoDB store
     )
-    # 4. Get the LLM
     llm = get_llm()
     if prompt_type == "page_speed":
-        # Use the specific prompt for Page Speed Insights
         user_prompt = page_speed_prompt
     elif prompt_type == "seo":
-        # Use the specific prompt for SEO
         user_prompt = seo_prompt
     else:
-        # Default to the user prompt if no specific type is provided
         user_prompt = default_user_prompt
-    # 5. Build the ConversationalRetrievalChain with the wrapped memory
-    chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
         retriever=retriever,
-        memory=memory,                             # ← pass the ConversationBufferMemory here
         return_source_documents=False,
         chain_type="stuff",
-        combine_docs_chain_kwargs={"prompt": user_prompt},  # Use the user prompt for combining docs
-        verbose=False,
     )
-    return chain

 from langchain_community.vectorstores import FAISS
 from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
+from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from app.page_speed.config import settings
 from .db import vectorstore_meta_coll, chat_collection_name
 from .embeddings import embeddings, text_splitter, get_llm
 from .logging_config import logger
+from .prompt_library import (
+    default_user_prompt,
+    page_speed_prompt,
+    seo_prompt
+)
+# 1. Path with doc_type
+def get_vectorstore_path(onboarding_id: str, doc_type: str) -> str:
     """
+    Returns './vectorstores/{onboarding_id}/{doc_type}'.
     """
     base_dir = settings.vectorstore_base_path
+    return os.path.join(base_dir, onboarding_id, doc_type)
+# 2. Save to disk under doc_type
+def save_vectorstore_to_disk(vectorstore: FAISS, onboarding_id: str, doc_type: str) -> str:
     """
+    Save under './vectorstores/{onboarding_id}/{doc_type}/faiss_index'.
     """
+    vs_dir = get_vectorstore_path(onboarding_id, doc_type)
+    faiss_index_path = os.path.join(vs_dir, "faiss_index")
     os.makedirs(faiss_index_path, exist_ok=True)
     vectorstore.save_local(folder_path=faiss_index_path)
     return faiss_index_path
+# 3. Metadata now includes doc_type
+def upsert_vectorstore_metadata(
+    onboarding_id: str,
+    doc_type: str,
+    vectorstore_path: str,
+    chat_id: str
+) -> None:
     vectorstore_meta_coll.update_one(
+        {"onboarding_id": onboarding_id, "doc_type": doc_type},
+        {"$set": {"vectorstore_path": vectorstore_path, "chat_id": chat_id}},
         upsert=True
     )
+def get_vectorstore_metadata(
+    onboarding_id: str,
+    doc_type: str
+) -> Optional[Dict[str, Any]]:
+    return vectorstore_meta_coll.find_one({"onboarding_id": onboarding_id, "doc_type": doc_type})
+# 4. Build chain now takes doc_type
+def build_rag_chain(
+    onboarding_id: str,
+    doc_type: str,
+    chat_id: str,
+    prompt_type: str
+) -> ConversationalRetrievalChain:
+    # Load index
+    vs_path = get_vectorstore_path(onboarding_id, doc_type)
+    faiss_vs = FAISS.load_local(
+        folder_path=os.path.join(vs_path, "faiss_index"),
+        embeddings=embeddings,
+        allow_dangerous_deserialization=True
+    )
+    retriever = faiss_vs.as_retriever(search_kwargs={"k": 5})
+    # History & memory
+    chat_history = MongoDBChatMessageHistory(
         session_id=chat_id,
         connection_string=settings.mongo_uri,
         database_name=settings.mongo_chat_db,
         collection_name=chat_collection_name,
     )
     memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        chat_history=chat_history
     )
     llm = get_llm()
+    # Choose prompt
     if prompt_type == "page_speed":
         user_prompt = page_speed_prompt
     elif prompt_type == "seo":
         user_prompt = seo_prompt
     else:
         user_prompt = default_user_prompt
+    return ConversationalRetrievalChain.from_llm(
         llm=llm,
         retriever=retriever,
+        memory=memory,
         return_source_documents=False,
         chain_type="stuff",
+        combine_docs_chain_kwargs={"prompt": user_prompt},
+        verbose=False
     )