Refactored vectorstore path to include document type (doc_type)
Browse files- app/page_speed/services.py +1 -5
- app/rag/routes.py +72 -108
- app/rag/utils.py +57 -100
app/page_speed/services.py
CHANGED
|
@@ -126,11 +126,7 @@ class PageSpeedService:
|
|
| 126 |
"""
|
| 127 |
logger.debug("Building Gemini analysis prompt from PageSpeed data.")
|
| 128 |
return f"""
|
| 129 |
-
|
| 130 |
-
You are an **Expert Web Performance Optimization Consultant**. The following JSON `{pagespeed_data}` contains exactly these keys (all required):
|
| 131 |
-
=======
|
| 132 |
-
You are an **Expert Web Performance Optimization Consultant**. The following JSON `{{pagespeed_data}}` includes detailed website performance metrics from Google PageSpeed Insights.
|
| 133 |
-
>>>>>>> 574c6ac (Update endpoints):app/page_speed/services.py
|
| 134 |
|
| 135 |
Your task is to analyze this data and generate a human-friendly performance **report in plain English**. The report will be read by a **non-technical business owner**, so keep it understandable while explaining technical concepts briefly when necessary.
|
| 136 |
|
|
|
|
| 126 |
"""
|
| 127 |
logger.debug("Building Gemini analysis prompt from PageSpeed data.")
|
| 128 |
return f"""
|
| 129 |
+
You are an **Expert Web Performance Optimization Consultant**. The following JSON page speed data includes detailed website performance metrics from Google PageSpeed Insights.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
Your task is to analyze this data and generate a human-friendly performance **report in plain English**. The report will be read by a **non-technical business owner**, so keep it understandable while explaining technical concepts briefly when necessary.
|
| 132 |
|
app/rag/routes.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
-
from fastapi import APIRouter, HTTPException
|
| 4 |
|
| 5 |
from .schemas import SetupRequest, ChatRequest, SetupResponse, ChatResponse
|
| 6 |
from .utils import (
|
| 7 |
-
|
| 8 |
text_splitter,
|
| 9 |
embeddings,
|
| 10 |
save_vectorstore_to_disk,
|
| 11 |
upsert_vectorstore_metadata,
|
| 12 |
-
|
| 13 |
build_rag_chain
|
| 14 |
)
|
| 15 |
from .chat_history import ChatHistoryManager
|
|
@@ -17,139 +17,103 @@ from .logging_config import logger
|
|
| 17 |
|
| 18 |
router = APIRouter(prefix="/rag", tags=["rag"])
|
| 19 |
|
| 20 |
-
@router.post("/initialization/{onboarding_id}", response_model=SetupResponse)
|
| 21 |
-
async def setup_rag_session(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
"""
|
| 23 |
-
|
| 24 |
-
- If vectorstore exists for
|
| 25 |
- Always create a new chat_id for this session.
|
| 26 |
"""
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
| 30 |
logger.info(
|
| 31 |
-
"Vectorstore exists for onboarding_id=%s
|
| 32 |
-
onboarding_id,
|
| 33 |
)
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
if chat_session:
|
| 39 |
-
chat_id = str(chat_session["chat_id"])
|
| 40 |
-
logger.info("Using existing chat session id=%s for onboarding_id=%s", chat_id, onboarding_id)
|
| 41 |
else:
|
| 42 |
-
|
| 43 |
-
|
|
|
|
| 44 |
return SetupResponse(
|
| 45 |
success=True,
|
| 46 |
message="RAG setup completed with existing vectorstore.",
|
| 47 |
onboarding_id=onboarding_id,
|
|
|
|
| 48 |
chat_id=chat_id,
|
| 49 |
-
vectorstore_path=
|
| 50 |
)
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
raise HTTPException(
|
| 58 |
-
status_code=400,
|
| 59 |
-
detail="Vectorstore does not exist; please provide documents to ingest."
|
| 60 |
-
)
|
| 61 |
-
|
| 62 |
-
# Create new chat session
|
| 63 |
-
chat_id = str(uuid.uuid4())
|
| 64 |
-
ChatHistoryManager.create_session(chat_id)
|
| 65 |
-
logger.info(
|
| 66 |
-
"Created new chat session %s for onboarding_id=%s",
|
| 67 |
-
chat_id, onboarding_id
|
| 68 |
-
)
|
| 69 |
-
|
| 70 |
-
# Ingest new vectorstore
|
| 71 |
-
all_text = "\n\n".join(body.documents)
|
| 72 |
-
text_chunks = text_splitter.split_text(all_text)
|
| 73 |
-
logger.info("Split into %d chunks for ingestion", len(text_chunks))
|
| 74 |
-
from langchain.vectorstores import FAISS as _FAISS
|
| 75 |
-
vs = _FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 76 |
-
vs_path = save_vectorstore_to_disk(vs, onboarding_id)
|
| 77 |
-
logger.info("Saved FAISS index to %s", vs_path)
|
| 78 |
-
upsert_vectorstore_metadata(onboarding_id, vs_path, chat_id)
|
| 79 |
-
logger.info(
|
| 80 |
-
"Upserted vectorstore metadata for onboarding_id=%s", onboarding_id
|
| 81 |
)
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
return SetupResponse(
|
| 85 |
success=True,
|
| 86 |
message="RAG setup completed.",
|
| 87 |
onboarding_id=onboarding_id,
|
|
|
|
| 88 |
chat_id=chat_id,
|
| 89 |
vectorstore_path=vs_path
|
| 90 |
)
|
| 91 |
|
| 92 |
-
@router.post("/chat/{onboarding_id}/{chat_id}", response_model=ChatResponse)
|
| 93 |
-
async def chat_with_user(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
"""
|
| 95 |
-
Chat endpoint
|
| 96 |
-
- Validates that the vectorstore exists for onboarding_id.
|
| 97 |
-
- Validates that the chat session exists.
|
| 98 |
"""
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
logger.error("Vectorstore not found for onboarding_id=%s", onboarding_id)
|
| 103 |
-
raise HTTPException(
|
| 104 |
-
status_code=400,
|
| 105 |
-
detail="Vectorstore not found for this onboarding_id. Please run /setup first."
|
| 106 |
-
)
|
| 107 |
|
| 108 |
-
# 1. Ensure chat session exists
|
| 109 |
if not ChatHistoryManager.chat_exists(chat_id):
|
| 110 |
-
|
| 111 |
-
raise HTTPException(
|
| 112 |
-
status_code=404,
|
| 113 |
-
detail=f"Chat session {chat_id} does not exist."
|
| 114 |
-
)
|
| 115 |
|
| 116 |
question = body.question.strip()
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
try:
|
| 120 |
-
# Summarize long histories
|
| 121 |
-
ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
|
| 122 |
-
|
| 123 |
-
# Record the user message
|
| 124 |
-
ChatHistoryManager.add_message(chat_id, role="human", content=question)
|
| 125 |
-
|
| 126 |
-
# Build and invoke the RAG chain
|
| 127 |
-
chain = build_rag_chain(onboarding_id, chat_id, prompt_type)
|
| 128 |
-
history = ChatHistoryManager.get_messages(chat_id)
|
| 129 |
-
result = chain.invoke({"question": question, "chat_history": history})
|
| 130 |
-
answer = result.get("answer") or result.get("output_text")
|
| 131 |
-
if not answer:
|
| 132 |
-
raise Exception("No answer returned from chain")
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
except HTTPException:
|
| 146 |
-
raise
|
| 147 |
-
except Exception as e:
|
| 148 |
-
logger.error("Error chatting onboarding_id=%s chat=%s: %s", onboarding_id, chat_id, e, exc_info=True)
|
| 149 |
-
return ChatResponse(
|
| 150 |
-
success=False,
|
| 151 |
-
answer=None,
|
| 152 |
-
error=str(e),
|
| 153 |
-
chat_id=chat_id,
|
| 154 |
-
onboarding_id=onboarding_id
|
| 155 |
-
)
|
|
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
+
from fastapi import APIRouter, HTTPException, Path, Query
|
| 4 |
|
| 5 |
from .schemas import SetupRequest, ChatRequest, SetupResponse, ChatResponse
|
| 6 |
from .utils import (
|
| 7 |
+
get_vectorstore_path,
|
| 8 |
text_splitter,
|
| 9 |
embeddings,
|
| 10 |
save_vectorstore_to_disk,
|
| 11 |
upsert_vectorstore_metadata,
|
| 12 |
+
get_vectorstore_metadata,
|
| 13 |
build_rag_chain
|
| 14 |
)
|
| 15 |
from .chat_history import ChatHistoryManager
|
|
|
|
| 17 |
|
| 18 |
router = APIRouter(prefix="/rag", tags=["rag"])
|
| 19 |
|
| 20 |
+
@router.post("/initialization/{onboarding_id}/{doc_type}", response_model=SetupResponse)
|
| 21 |
+
async def setup_rag_session(
|
| 22 |
+
onboarding_id: str = Path(..., description="Unique onboarding identifier"),
|
| 23 |
+
doc_type: str = Path(..., description="Type of document (e.g., page_speed, seo, uiux)"),
|
| 24 |
+
body: SetupRequest = ...
|
| 25 |
+
):
|
| 26 |
"""
|
| 27 |
+
Ingest documents under a specific document type and create a chat session.
|
| 28 |
+
- If vectorstore exists for onboarding_id and doc_type, skip ingestion.
|
| 29 |
- Always create a new chat_id for this session.
|
| 30 |
"""
|
| 31 |
+
vectorstore_path = get_vectorstore_path(onboarding_id, doc_type)
|
| 32 |
+
|
| 33 |
+
# Existing vectorstore
|
| 34 |
+
if os.path.isdir(os.path.join(vectorstore_path, "faiss_index")):
|
| 35 |
logger.info(
|
| 36 |
+
"Vectorstore exists for onboarding_id=%s, doc_type=%s; skipping ingestion",
|
| 37 |
+
onboarding_id, doc_type
|
| 38 |
)
|
| 39 |
+
metadata = get_vectorstore_metadata(onboarding_id, doc_type)
|
| 40 |
+
if metadata and metadata.get("chat_id"):
|
| 41 |
+
chat_id = metadata["chat_id"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
else:
|
| 43 |
+
chat_id = str(uuid.uuid4())
|
| 44 |
+
ChatHistoryManager.create_session(chat_id)
|
| 45 |
+
upsert_vectorstore_metadata(onboarding_id, doc_type, vectorstore_path, chat_id)
|
| 46 |
return SetupResponse(
|
| 47 |
success=True,
|
| 48 |
message="RAG setup completed with existing vectorstore.",
|
| 49 |
onboarding_id=onboarding_id,
|
| 50 |
+
doc_type=doc_type,
|
| 51 |
chat_id=chat_id,
|
| 52 |
+
vectorstore_path=vectorstore_path
|
| 53 |
)
|
| 54 |
|
| 55 |
+
# New ingestion
|
| 56 |
+
if not body.documents:
|
| 57 |
+
logger.error(
|
| 58 |
+
"Missing documents for onboarding_id=%s, doc_type=%s",
|
| 59 |
+
onboarding_id, doc_type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
)
|
| 61 |
+
raise HTTPException(status_code=400, detail="Please provide documents to ingest.")
|
| 62 |
+
|
| 63 |
+
# Create session and ingest
|
| 64 |
+
chat_id = str(uuid.uuid4())
|
| 65 |
+
ChatHistoryManager.create_session(chat_id)
|
| 66 |
+
all_text = "\n\n".join(body.documents)
|
| 67 |
+
text_chunks = text_splitter.split_text(all_text)
|
| 68 |
+
vs = __import__("langchain_community.vectorstores").vectorstores.FAISS.from_texts(
|
| 69 |
+
texts=text_chunks,
|
| 70 |
+
embedding=embeddings
|
| 71 |
+
)
|
| 72 |
+
vs_path = save_vectorstore_to_disk(vs, onboarding_id, doc_type)
|
| 73 |
+
upsert_vectorstore_metadata(onboarding_id, doc_type, vs_path, chat_id)
|
| 74 |
|
| 75 |
return SetupResponse(
|
| 76 |
success=True,
|
| 77 |
message="RAG setup completed.",
|
| 78 |
onboarding_id=onboarding_id,
|
| 79 |
+
doc_type=doc_type,
|
| 80 |
chat_id=chat_id,
|
| 81 |
vectorstore_path=vs_path
|
| 82 |
)
|
| 83 |
|
| 84 |
+
@router.post("/chat/{onboarding_id}/{doc_type}/{chat_id}", response_model=ChatResponse)
|
| 85 |
+
async def chat_with_user(
|
| 86 |
+
onboarding_id: str = Path(...),
|
| 87 |
+
doc_type: str = Path(...),
|
| 88 |
+
chat_id: str = Path(...),
|
| 89 |
+
prompt_type: str = Query(..., description="Prompt type, e.g., page_speed or seo"),
|
| 90 |
+
body: ChatRequest = ...
|
| 91 |
+
):
|
| 92 |
"""
|
| 93 |
+
Chat endpoint using a specific document-type vectorstore.
|
|
|
|
|
|
|
| 94 |
"""
|
| 95 |
+
vectorstore_path = get_vectorstore_path(onboarding_id, doc_type)
|
| 96 |
+
if not os.path.isdir(os.path.join(vectorstore_path, "faiss_index")):
|
| 97 |
+
raise HTTPException(status_code=400, detail="Vectorstore not found; run initialization first.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
|
|
|
| 99 |
if not ChatHistoryManager.chat_exists(chat_id):
|
| 100 |
+
raise HTTPException(status_code=404, detail=f"Chat session {chat_id} not found.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
question = body.question.strip()
|
| 103 |
+
ChatHistoryManager.summarize_if_needed(chat_id, threshold=10)
|
| 104 |
+
ChatHistoryManager.add_message(chat_id, role="human", content=question)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
chain = build_rag_chain(onboarding_id, doc_type, chat_id, prompt_type)
|
| 107 |
+
history = ChatHistoryManager.get_messages(chat_id)
|
| 108 |
+
result = chain.invoke({"question": question, "chat_history": history})
|
| 109 |
+
answer = result.get("answer") or result.get("output_text")
|
| 110 |
+
ChatHistoryManager.add_message(chat_id, role="ai", content=answer)
|
| 111 |
|
| 112 |
+
return ChatResponse(
|
| 113 |
+
success=True,
|
| 114 |
+
answer=answer,
|
| 115 |
+
error=None,
|
| 116 |
+
chat_id=chat_id,
|
| 117 |
+
onboarding_id=onboarding_id,
|
| 118 |
+
doc_type=doc_type
|
| 119 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/rag/utils.py
CHANGED
|
@@ -4,145 +4,102 @@ from fastapi import HTTPException
|
|
| 4 |
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
|
| 7 |
-
from langchain.memory import ConversationBufferMemory
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
|
| 10 |
from app.page_speed.config import settings
|
| 11 |
from .db import vectorstore_meta_coll, chat_collection_name
|
| 12 |
from .embeddings import embeddings, text_splitter, get_llm
|
| 13 |
from .logging_config import logger
|
| 14 |
-
from
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
| 20 |
"""
|
| 21 |
-
|
| 22 |
-
Returns a path like './vectorstores/{onboarding_id}'.
|
| 23 |
"""
|
| 24 |
base_dir = settings.vectorstore_base_path
|
| 25 |
-
|
| 26 |
-
# os.makedirs(user_dir, exist_ok=True)
|
| 27 |
-
return user_dir
|
| 28 |
-
|
| 29 |
-
# ──────────────────────────────────────────────────────────────────────────────
|
| 30 |
-
# 2. Build or Load an Existing FAISS Index for a User
|
| 31 |
-
# ──────────────────────────────────────────────────────────────────────────────
|
| 32 |
-
def build_or_load_vectorstore(onboarding_id: str) -> FAISS:
|
| 33 |
-
"""
|
| 34 |
-
Attempt to load an existing FAISS index for this user.
|
| 35 |
-
If not found on disk, raise a FileNotFoundError.
|
| 36 |
-
"""
|
| 37 |
-
user_dir = get_vectorstore_path(onboarding_id)
|
| 38 |
-
faiss_index_path = os.path.join(user_dir, "faiss_index")
|
| 39 |
-
|
| 40 |
-
if not os.path.isdir(faiss_index_path):
|
| 41 |
-
raise FileNotFoundError(f"No vectorstore found at {faiss_index_path}")
|
| 42 |
-
|
| 43 |
-
# Allow loading your own index via pickle
|
| 44 |
-
return FAISS.load_local(
|
| 45 |
-
folder_path=faiss_index_path,
|
| 46 |
-
embeddings=embeddings,
|
| 47 |
-
allow_dangerous_deserialization=True
|
| 48 |
-
)
|
| 49 |
|
| 50 |
-
#
|
| 51 |
-
|
| 52 |
-
# ──────────────────────────────────────────────────────────────────────────────
|
| 53 |
-
def save_vectorstore_to_disk(vectorstore: FAISS, vectorstore_name: str) -> str:
|
| 54 |
"""
|
| 55 |
-
Save
|
| 56 |
-
Returns the path to that saved folder.
|
| 57 |
"""
|
| 58 |
-
|
| 59 |
-
faiss_index_path = os.path.join(
|
| 60 |
os.makedirs(faiss_index_path, exist_ok=True)
|
| 61 |
vectorstore.save_local(folder_path=faiss_index_path)
|
| 62 |
return faiss_index_path
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
vectorstore_meta_coll.update_one(
|
| 72 |
-
{"onboarding_id": onboarding_id},
|
| 73 |
-
{"$set": {"vectorstore_path": vectorstore_path},
|
| 74 |
-
"$setOnInsert": {"chat_id": chat_id}},
|
| 75 |
upsert=True
|
| 76 |
)
|
| 77 |
|
| 78 |
-
def get_vectorstore_metadata(onboarding_id: str) -> Optional[Dict[str, Any]]:
|
| 79 |
-
"""
|
| 80 |
-
Retrieve the metadata doc (if any) for this onboarding_id.
|
| 81 |
-
"""
|
| 82 |
-
return vectorstore_meta_coll.find_one({"onboarding_id": onboarding_id})
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
"""
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
session_id=chat_id,
|
| 93 |
connection_string=settings.mongo_uri,
|
| 94 |
database_name=settings.mongo_chat_db,
|
| 95 |
collection_name=chat_collection_name,
|
| 96 |
)
|
| 97 |
-
|
| 98 |
-
# ──────────────────────────────────────────────────────────────────────────────
|
| 99 |
-
# 6. Build a ConversationalRetrievalChain (RAG Chain) for onboarding_id + chat_id
|
| 100 |
-
# ──────────────────────────────────────────────────────────────────────────────
|
| 101 |
-
def build_rag_chain(onboarding_id: str, chat_id: str, prompt_type: str) -> ConversationalRetrievalChain:
|
| 102 |
-
"""
|
| 103 |
-
- Loads the FAISS index for onboarding_id.
|
| 104 |
-
- Creates a retriever (k=3).
|
| 105 |
-
- Wraps MongoDBChatMessageHistory in a ConversationBufferMemory.
|
| 106 |
-
- Attaches the ChatGroq LLM + user_prompt.
|
| 107 |
-
"""
|
| 108 |
-
# 1. Load FAISS index (or 404 if not found)
|
| 109 |
-
try:
|
| 110 |
-
faiss_vs = build_or_load_vectorstore(onboarding_id)
|
| 111 |
-
except FileNotFoundError:
|
| 112 |
-
raise HTTPException(status_code=404, detail="Vectorstore not found for this onboarding_id. Call /rag/ingest first.")
|
| 113 |
-
|
| 114 |
-
retriever = faiss_vs.as_retriever(search_kwargs={"k": 5})
|
| 115 |
-
|
| 116 |
-
# 2. Instantiate a MongoDB-based chat history
|
| 117 |
-
chat_history = initialize_chat_history(chat_id)
|
| 118 |
-
|
| 119 |
-
# 3. Wrap that history in a ConversationBufferMemory, so the chain gets a valid "Memory" object
|
| 120 |
memory = ConversationBufferMemory(
|
| 121 |
-
memory_key="chat_history",
|
| 122 |
-
chat_history=chat_history
|
| 123 |
)
|
| 124 |
|
| 125 |
-
# 4. Get the LLM
|
| 126 |
llm = get_llm()
|
| 127 |
-
|
| 128 |
if prompt_type == "page_speed":
|
| 129 |
-
# Use the specific prompt for Page Speed Insights
|
| 130 |
user_prompt = page_speed_prompt
|
| 131 |
elif prompt_type == "seo":
|
| 132 |
-
# Use the specific prompt for SEO
|
| 133 |
user_prompt = seo_prompt
|
| 134 |
else:
|
| 135 |
-
# Default to the user prompt if no specific type is provided
|
| 136 |
user_prompt = default_user_prompt
|
| 137 |
|
| 138 |
-
|
| 139 |
-
chain = ConversationalRetrievalChain.from_llm(
|
| 140 |
llm=llm,
|
| 141 |
retriever=retriever,
|
| 142 |
-
memory=memory,
|
| 143 |
return_source_documents=False,
|
| 144 |
chain_type="stuff",
|
| 145 |
-
combine_docs_chain_kwargs={"prompt": user_prompt},
|
| 146 |
-
verbose=False
|
| 147 |
)
|
| 148 |
-
return chain
|
|
|
|
| 4 |
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
|
| 7 |
+
from langchain.memory import ConversationBufferMemory
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
|
| 10 |
from app.page_speed.config import settings
|
| 11 |
from .db import vectorstore_meta_coll, chat_collection_name
|
| 12 |
from .embeddings import embeddings, text_splitter, get_llm
|
| 13 |
from .logging_config import logger
|
| 14 |
+
from .prompt_library import (
|
| 15 |
+
default_user_prompt,
|
| 16 |
+
page_speed_prompt,
|
| 17 |
+
seo_prompt
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# 1. Path with doc_type
|
| 21 |
+
def get_vectorstore_path(onboarding_id: str, doc_type: str) -> str:
|
| 22 |
"""
|
| 23 |
+
Returns './vectorstores/{onboarding_id}/{doc_type}'.
|
|
|
|
| 24 |
"""
|
| 25 |
base_dir = settings.vectorstore_base_path
|
| 26 |
+
return os.path.join(base_dir, onboarding_id, doc_type)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
# 2. Save to disk under doc_type
|
| 29 |
+
def save_vectorstore_to_disk(vectorstore: FAISS, onboarding_id: str, doc_type: str) -> str:
|
|
|
|
|
|
|
| 30 |
"""
|
| 31 |
+
Save under './vectorstores/{onboarding_id}/{doc_type}/faiss_index'.
|
|
|
|
| 32 |
"""
|
| 33 |
+
vs_dir = get_vectorstore_path(onboarding_id, doc_type)
|
| 34 |
+
faiss_index_path = os.path.join(vs_dir, "faiss_index")
|
| 35 |
os.makedirs(faiss_index_path, exist_ok=True)
|
| 36 |
vectorstore.save_local(folder_path=faiss_index_path)
|
| 37 |
return faiss_index_path
|
| 38 |
|
| 39 |
+
# 3. Metadata now includes doc_type
|
| 40 |
+
def upsert_vectorstore_metadata(
|
| 41 |
+
onboarding_id: str,
|
| 42 |
+
doc_type: str,
|
| 43 |
+
vectorstore_path: str,
|
| 44 |
+
chat_id: str
|
| 45 |
+
) -> None:
|
| 46 |
vectorstore_meta_coll.update_one(
|
| 47 |
+
{"onboarding_id": onboarding_id, "doc_type": doc_type},
|
| 48 |
+
{"$set": {"vectorstore_path": vectorstore_path, "chat_id": chat_id}},
|
|
|
|
| 49 |
upsert=True
|
| 50 |
)
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
def get_vectorstore_metadata(
|
| 54 |
+
onboarding_id: str,
|
| 55 |
+
doc_type: str
|
| 56 |
+
) -> Optional[Dict[str, Any]]:
|
| 57 |
+
return vectorstore_meta_coll.find_one({"onboarding_id": onboarding_id, "doc_type": doc_type})
|
| 58 |
+
|
| 59 |
+
# 4. Build chain now takes doc_type
|
| 60 |
+
|
| 61 |
+
def build_rag_chain(
|
| 62 |
+
onboarding_id: str,
|
| 63 |
+
doc_type: str,
|
| 64 |
+
chat_id: str,
|
| 65 |
+
prompt_type: str
|
| 66 |
+
) -> ConversationalRetrievalChain:
|
| 67 |
+
# Load index
|
| 68 |
+
vs_path = get_vectorstore_path(onboarding_id, doc_type)
|
| 69 |
+
faiss_vs = FAISS.load_local(
|
| 70 |
+
folder_path=os.path.join(vs_path, "faiss_index"),
|
| 71 |
+
embeddings=embeddings,
|
| 72 |
+
allow_dangerous_deserialization=True
|
| 73 |
+
)
|
| 74 |
+
retriever = faiss_vs.as_retriever(search_kwargs={"k": 5})
|
| 75 |
+
|
| 76 |
+
# History & memory
|
| 77 |
+
chat_history = MongoDBChatMessageHistory(
|
| 78 |
session_id=chat_id,
|
| 79 |
connection_string=settings.mongo_uri,
|
| 80 |
database_name=settings.mongo_chat_db,
|
| 81 |
collection_name=chat_collection_name,
|
| 82 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
memory = ConversationBufferMemory(
|
| 84 |
+
memory_key="chat_history",
|
| 85 |
+
chat_history=chat_history
|
| 86 |
)
|
| 87 |
|
|
|
|
| 88 |
llm = get_llm()
|
| 89 |
+
# Choose prompt
|
| 90 |
if prompt_type == "page_speed":
|
|
|
|
| 91 |
user_prompt = page_speed_prompt
|
| 92 |
elif prompt_type == "seo":
|
|
|
|
| 93 |
user_prompt = seo_prompt
|
| 94 |
else:
|
|
|
|
| 95 |
user_prompt = default_user_prompt
|
| 96 |
|
| 97 |
+
return ConversationalRetrievalChain.from_llm(
|
|
|
|
| 98 |
llm=llm,
|
| 99 |
retriever=retriever,
|
| 100 |
+
memory=memory,
|
| 101 |
return_source_documents=False,
|
| 102 |
chain_type="stuff",
|
| 103 |
+
combine_docs_chain_kwargs={"prompt": user_prompt},
|
| 104 |
+
verbose=False
|
| 105 |
)
|
|
|