| import warnings | |
| warnings.filterwarnings(action='ignore') | |
| import torch | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from common.rag.document_loader import fetch_document_chunks | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def fetch_vectorstore_retriever(): | |
| """ | |
| Create and return a FAISS-based retriever for graphology/handwriting analysis documents. | |
| This function: | |
| - Loads sentence-transformers/all-MiniLM-L6-v2 embeddings (GPU if available) | |
| - Builds a FAISS vector store from document chunks obtained via fetch_document_chunks() | |
| - Returns a similarity search retriever configured to return top 10 most relevant chunks | |
| Returns | |
| ------- | |
| langchain_core.retrievers.BaseRetriever | |
| Configured FAISS retriever ready to be used with .invoke() or .get_relevant_documents() | |
| Notes | |
| ----- | |
| - The vector store is **recreated from scratch every time** this function is called. | |
| - This can be slow on first run or when document collection is large. | |
| - Consider caching/persisting the vectorstore in production for better performance. | |
| - Uses normalize_embeddings=True → cosine similarity is used internally. | |
| """ | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}, | |
| encode_kwargs={'normalize_embeddings': True} | |
| ) | |
| vectorstore = FAISS.from_documents( | |
| documents=fetch_document_chunks(), | |
| embedding=embeddings | |
| ) | |
| retriever = vectorstore.as_retriever( | |
| search_type="similarity", | |
| search_kwargs={"k": 10} | |
| ) | |
| return retriever | |
| def fetch_relevant_document(topic="None"): | |
| """ | |
| Retrieve relevant document chunks for graphological analysis of a specific topic/trait. | |
| Constructs a detailed, structured query optimized for finding handwriting analysis content, | |
| then retrieves the top 10 most similar document chunks from the FAISS vector store. | |
| Parameters | |
| ---------- | |
| topic : str, default="None" | |
| Personality trait, psychological characteristic, writing style aspect or any topic | |
| for which handwriting analysis information is requested. | |
| Examples: "ambition", "emotional stability", "aggressiveness", "introversion" | |
| Returns | |
| ------- | |
| str | |
| Concatenated string containing up to 10 relevant document chunks, each prefixed | |
| with "[Document N]" for clear identification in the RAG context. | |
| Returns empty context string if topic is "None" or no relevant chunks are found. | |
| Notes | |
| ----- | |
| - The query is intentionally very specific and structured to improve retrieval quality | |
| for handwriting/graphology related content. | |
| - Uses similarity (cosine) search with k=10 (top 10 results). | |
| - The returned context is meant to be directly passed into a RAG prompt for LLM analysis. | |
| """ | |
| retriever = fetch_vectorstore_retriever() | |
| query = ( | |
| f"Handwriting sample analysis for: {topic}\n" | |
| "Extract and summarize: \n" | |
| "- Observed writing style characteristics (slant, pressure, size, speed, spacing, margins, baseline, letter forms, connections, etc.)\n" | |
| "- Graphological interpretations of personality traits linked to those features\n" | |
| "- Overall psychological or personality impression" | |
| ) | |
| docs = retriever.invoke(query) | |
| context = "\n\n".join(f"[Document {i+1}]\n{doc.page_content}\n" for i, doc in enumerate(docs)) | |
| return context |