anyonehomep1mane
Initial Changes
5637ddb
import warnings
warnings.filterwarnings(action='ignore')
import torch
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from common.rag.document_loader import fetch_document_chunks
from dotenv import load_dotenv
load_dotenv()
def fetch_vectorstore_retriever():
"""
Create and return a FAISS-based retriever for graphology/handwriting analysis documents.
This function:
- Loads sentence-transformers/all-MiniLM-L6-v2 embeddings (GPU if available)
- Builds a FAISS vector store from document chunks obtained via fetch_document_chunks()
- Returns a similarity search retriever configured to return top 10 most relevant chunks
Returns
-------
langchain_core.retrievers.BaseRetriever
Configured FAISS retriever ready to be used with .invoke() or .get_relevant_documents()
Notes
-----
- The vector store is **recreated from scratch every time** this function is called.
- This can be slow on first run or when document collection is large.
- Consider caching/persisting the vectorstore in production for better performance.
- Uses normalize_embeddings=True → cosine similarity is used internally.
"""
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'},
encode_kwargs={'normalize_embeddings': True}
)
vectorstore = FAISS.from_documents(
documents=fetch_document_chunks(),
embedding=embeddings
)
retriever = vectorstore.as_retriever(
search_type="similarity",
search_kwargs={"k": 10}
)
return retriever
def fetch_relevant_document(topic="None"):
"""
Retrieve relevant document chunks for graphological analysis of a specific topic/trait.
Constructs a detailed, structured query optimized for finding handwriting analysis content,
then retrieves the top 10 most similar document chunks from the FAISS vector store.
Parameters
----------
topic : str, default="None"
Personality trait, psychological characteristic, writing style aspect or any topic
for which handwriting analysis information is requested.
Examples: "ambition", "emotional stability", "aggressiveness", "introversion"
Returns
-------
str
Concatenated string containing up to 10 relevant document chunks, each prefixed
with "[Document N]" for clear identification in the RAG context.
Returns empty context string if topic is "None" or no relevant chunks are found.
Notes
-----
- The query is intentionally very specific and structured to improve retrieval quality
for handwriting/graphology related content.
- Uses similarity (cosine) search with k=10 (top 10 results).
- The returned context is meant to be directly passed into a RAG prompt for LLM analysis.
"""
retriever = fetch_vectorstore_retriever()
query = (
f"Handwriting sample analysis for: {topic}\n"
"Extract and summarize: \n"
"- Observed writing style characteristics (slant, pressure, size, speed, spacing, margins, baseline, letter forms, connections, etc.)\n"
"- Graphological interpretations of personality traits linked to those features\n"
"- Overall psychological or personality impression"
)
docs = retriever.invoke(query)
context = "\n\n".join(f"[Document {i+1}]\n{doc.page_content}\n" for i, doc in enumerate(docs))
return context