| import os
|
| from langchain_community.document_loaders import TextLoader
|
| from langchain.text_splitter import CharacterTextSplitter
|
| from langchain_huggingface import HuggingFaceEmbeddings
|
| from langchain_chroma import Chroma
|
| from langchain_huggingface import HuggingFacePipeline
|
| from langchain.chains import RetrievalQA
|
| from transformers import pipeline
|
| import json
|
| from langchain_huggingface import HuggingFaceEmbeddings
|
| from langchain_community.document_loaders import TextLoader
|
|
|
|
|
|
|
|
|
| def load_and_preprocess(file_path):
|
| loader = TextLoader(file_path)
|
| documents = loader.load()
|
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
| return text_splitter.split_documents(documents)
|
|
|
|
|
| def create_vector_store(documents, persist_directory):
|
| embeddings = HuggingFaceEmbeddings()
|
| vector_store = Chroma.from_documents(documents, embeddings, persist_directory=persist_directory)
|
| return vector_store
|
|
|
|
|
| def initialize_llm():
|
| generator = pipeline('text-generation', model='gpt2')
|
| return HuggingFacePipeline(pipeline=generator)
|
|
|
|
|
| def build_rag_pipeline(vector_store, llm):
|
| retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
|
| return RetrievalQA(llm=llm, retriever=retriever)
|
|
|
|
|
| def main():
|
|
|
| gita_docs = load_and_preprocess(r'C:\LLM_summerizer\bhagavad_gita_verses.csv')
|
| yoga_sutras_docs = load_and_preprocess(r'C:\LLM_summerizer\yoga_raw.txt')
|
| documents = gita_docs + yoga_sutras_docs
|
|
|
|
|
| vector_store = create_vector_store(documents, persist_directory='vector_store')
|
|
|
|
|
| llm = initialize_llm()
|
|
|
|
|
| rag_pipeline = build_rag_pipeline(vector_store, llm)
|
|
|
|
|
| query = "What does the Bhagavad Gita say about selfless action?"
|
| result = rag_pipeline.run(query)
|
|
|
|
|
| output = {
|
| "query": query,
|
| "answer": result
|
| }
|
|
|
|
|
| print(json.dumps(output, indent=2))
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|