RagChatBot / RagBotAssignment.py
Tanmay211998's picture
Upload RagBotAssignment.py
c4bb379 verified
# -*- coding: utf-8 -*-
"""RagBot (2).ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1pDSwQZ5XyUQf_efd7Y1dJsLE_L8JmEda
"""
!pip install pypdf
!pip install -q transformers einops accelerate langchain bitsandbytes
!pip install sentence_transformers
!pip install llama_index
!pip install llama-index-llms-huggingface
!pip install llama-index-readers-web
!pip install llama-index-embeddings-langchain
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt
documents = SimpleDirectoryReader('/content/data').load_data()
documents
system_prompts = """
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""
## Default format supportable by LLama2
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
!!huggingface-cli login
import torch
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={"temperature": 0.0, "do_sample": False},
system_prompt=system_prompts,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
model_name="meta-llama/Llama-2-7b-chat-hf",
device_map="auto",
# uncomment this if using CUDA to reduce memory usage
model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
)
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
from llama_index.embeddings.langchain import LangchainEmbedding
embed_model=LangchainEmbedding(
HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))
service_context=ServiceContext.from_defaults(
chunk_size=1024,
llm=llm,
embed_model=embed_model
)
index=VectorStoreIndex.from_documents(documents,service_context=service_context)
query_engine=index.as_query_engine()
response=query_engine.query("who is ceo of i2e")
print(response)
response=query_engine.query("who is ceo of google")
print(response)
response=query_engine.query("who is vishal, give a short discription of him")
print(response)
response=query_engine.query("who is vishal, give a long discription of him")
print(response)
response=query_engine.query("what is i2e")
print(response)
response=query_engine.query("about home page")
print(response)