Spaces:

Tanmay211998
/

RagChatBot

Sleeping

App Files Files Community

RagChatBot / RagBotAssignment.py

Tanmay211998

Upload RagBotAssignment.py

c4bb379 verified over 1 year ago

raw

history blame contribute delete

2.5 kB

	# -- coding: utf-8 --
	"""RagBot (2).ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1pDSwQZ5XyUQf_efd7Y1dJsLE_L8JmEda
	"""

	!pip install pypdf

	!pip install -q transformers einops accelerate langchain bitsandbytes

	!pip install sentence_transformers

	!pip install llama_index
	!pip install llama-index-llms-huggingface
	!pip install llama-index-readers-web
	!pip install llama-index-embeddings-langchain

	from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
	from llama_index.llms.huggingface import HuggingFaceLLM
	from llama_index.core.prompts.prompts import SimpleInputPrompt

	documents = SimpleDirectoryReader('/content/data').load_data()
	documents

	system_prompts = """
	You are a Q&A assistant. Your goal is to answer questions as
	accurately as possible based on the instructions and context provided.
	"""
	## Default format supportable by LLama2
	query_wrapper_prompt = SimpleInputPrompt("<\|USER\|>{query_str}<\|ASSISTANT\|>")

	!!huggingface-cli login



	import torch

	llm = HuggingFaceLLM(
	context_window=4096,
	max_new_tokens=256,
	generate_kwargs={"temperature": 0.0, "do_sample": False},
	system_prompt=system_prompts,
	query_wrapper_prompt=query_wrapper_prompt,
	tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
	model_name="meta-llama/Llama-2-7b-chat-hf",
	device_map="auto",
	# uncomment this if using CUDA to reduce memory usage
	model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
	)

	from langchain.embeddings.huggingface import HuggingFaceEmbeddings
	from llama_index.core import ServiceContext
	from llama_index.embeddings.langchain import LangchainEmbedding

	embed_model=LangchainEmbedding(
	HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

	service_context=ServiceContext.from_defaults(
	chunk_size=1024,
	llm=llm,
	embed_model=embed_model
	)

	index=VectorStoreIndex.from_documents(documents,service_context=service_context)

	query_engine=index.as_query_engine()

	response=query_engine.query("who is ceo of i2e")
	print(response)

	response=query_engine.query("who is ceo of google")
	print(response)

	response=query_engine.query("who is vishal, give a short discription of him")
	print(response)

	response=query_engine.query("who is vishal, give a long discription of him")
	print(response)

	response=query_engine.query("what is i2e")
	print(response)

	response=query_engine.query("about home page")
	print(response)