hello-rag / src /utils.py
anurag8590
files added
564359b
import gc
import logging
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_qdrant import Qdrant
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_google_genai import ChatGoogleGenerativeAI
from src.embeddings import get_embeddings
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def get_pdf_text(pdf_docs):
text = ""
try:
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
logger.info("Extracted the text successfully.")
except Exception as e:
logger.error(f"Error extracting text from PDFs: {e}")
gc.collect()
return text
def get_text_chunks(text, chunk_size, chunk_overlap):
try:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
chunks = text_splitter.split_text(text)
logger.info("Chunking done successfully.")
gc.collect()
return chunks
except Exception as e:
logger.error(f"Error splitting text into chunks: {e}")
gc.collect()
return []
def get_vector_store(chunks, target_collection, url, api_key):
try:
vector_store = Qdrant.from_texts(
chunks,
embedding=get_embeddings(),
url=url,
api_key=api_key,
prefer_grpc=False,
collection_name=target_collection,
timeout=75
)
logger.info("Vector store created successfully.")
logger.debug(f"Vector store: {vector_store}")
gc.collect()
return vector_store
except Exception as e:
logger.error(f"Error creating vector store: {e}")
return None
def get_conversational_chain(vector_store, google_api_key):
try:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=google_api_key)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory)
gc.collect()
return conversation_chain
except Exception as e:
logger.error(f"Error creating conversational chain: {e}")
return None