import gc import logging from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_qdrant import Qdrant from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from langchain_google_genai import ChatGoogleGenerativeAI from src.embeddings import get_embeddings logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get_pdf_text(pdf_docs): text = "" try: for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() logger.info("Extracted the text successfully.") except Exception as e: logger.error(f"Error extracting text from PDFs: {e}") gc.collect() return text def get_text_chunks(text, chunk_size, chunk_overlap): try: text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) chunks = text_splitter.split_text(text) logger.info("Chunking done successfully.") gc.collect() return chunks except Exception as e: logger.error(f"Error splitting text into chunks: {e}") gc.collect() return [] def get_vector_store(chunks, target_collection, url, api_key): try: vector_store = Qdrant.from_texts( chunks, embedding=get_embeddings(), url=url, api_key=api_key, prefer_grpc=False, collection_name=target_collection, timeout=75 ) logger.info("Vector store created successfully.") logger.debug(f"Vector store: {vector_store}") gc.collect() return vector_store except Exception as e: logger.error(f"Error creating vector store: {e}") return None def get_conversational_chain(vector_store, google_api_key): try: llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=google_api_key) memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory) gc.collect() return conversation_chain except Exception as e: logger.error(f"Error creating conversational chain: {e}") return None