# -*- coding: utf-8 -*-
"""RagBot (2).ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1pDSwQZ5XyUQf_efd7Y1dJsLE_L8JmEda
"""

!pip install pypdf

!pip install -q transformers einops accelerate langchain bitsandbytes

!pip install sentence_transformers

!pip install llama_index
!pip install llama-index-llms-huggingface
!pip install llama-index-readers-web
!pip install llama-index-embeddings-langchain

from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt

documents = SimpleDirectoryReader('/content/data').load_data()
documents

system_prompts = """
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""
## Default format supportable by LLama2
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

!!huggingface-cli login


import torch

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=system_prompts,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
)

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
from llama_index.embeddings.langchain import LangchainEmbedding

embed_model=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

service_context=ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)

index=VectorStoreIndex.from_documents(documents,service_context=service_context)

query_engine=index.as_query_engine()

response=query_engine.query("who is ceo of i2e")
print(response)

response=query_engine.query("who is ceo of google")
print(response)

response=query_engine.query("who is vishal, give a short discription of him")
print(response)

response=query_engine.query("who is vishal, give a long discription of him")
print(response)

response=query_engine.query("what is i2e")
print(response)

response=query_engine.query("about home page")
print(response)