Spaces:
Sleeping
Sleeping
File size: 2,497 Bytes
c4bb379 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# -*- coding: utf-8 -*-
"""RagBot (2).ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1pDSwQZ5XyUQf_efd7Y1dJsLE_L8JmEda
"""
!pip install pypdf
!pip install -q transformers einops accelerate langchain bitsandbytes
!pip install sentence_transformers
!pip install llama_index
!pip install llama-index-llms-huggingface
!pip install llama-index-readers-web
!pip install llama-index-embeddings-langchain
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt
documents = SimpleDirectoryReader('/content/data').load_data()
documents
system_prompts = """
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""
## Default format supportable by LLama2
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
!!huggingface-cli login
import torch
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={"temperature": 0.0, "do_sample": False},
system_prompt=system_prompts,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
model_name="meta-llama/Llama-2-7b-chat-hf",
device_map="auto",
# uncomment this if using CUDA to reduce memory usage
model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
)
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
from llama_index.embeddings.langchain import LangchainEmbedding
embed_model=LangchainEmbedding(
HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))
service_context=ServiceContext.from_defaults(
chunk_size=1024,
llm=llm,
embed_model=embed_model
)
index=VectorStoreIndex.from_documents(documents,service_context=service_context)
query_engine=index.as_query_engine()
response=query_engine.query("who is ceo of i2e")
print(response)
response=query_engine.query("who is ceo of google")
print(response)
response=query_engine.query("who is vishal, give a short discription of him")
print(response)
response=query_engine.query("who is vishal, give a long discription of him")
print(response)
response=query_engine.query("what is i2e")
print(response)
response=query_engine.query("about home page")
print(response) |