Spaces:

RAHMAN00700
/

Chat_with_URL1

Build error

App Files Files Community

Chat_with_URL1 / docs /webchat.py

RAHMAN00700

first commit

dee16ac about 1 year ago

raw

history blame contribute delete

9.11 kB

	# For reading credentials from the .env file
	import os
	from dotenv import load_dotenv

	from sentence_transformers import SentenceTransformer
	from chromadb.api.types import EmbeddingFunction

	# WML python SDK
	from ibm_watson_machine_learning.foundation_models import Model
	from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
	from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods

	import requests
	from bs4 import BeautifulSoup
	import spacy
	import chromadb
	import en_core_web_md


	# Important: hardcoding the API key in Python code is not a best practice. We are using
	# this approach for the ease of demo setup. In a production application these variables
	# can be stored in an .env or a properties file

	# URL of the hosted LLMs is hardcoded because at this time all LLMs share the same endpoint
	url = "https://us-south.ml.cloud.ibm.com"

	# These global variables will be updated in get_credentials() function
	watsonx_project_id = ""
	# Replace with your IBM Cloud key
	api_key = ""

	def get_credentials():

	load_dotenv()
	# Update the global variables that will be used for authentication in another function
	globals()["api_key"] = os.getenv("api_key", None)
	globals()["watsonx_project_id"] = os.getenv("project_id", None)

	# The get_model function creates an LLM model object with the specified parameters

	def get_model(model_type, max_tokens, min_tokens, decoding, temperature, top_k, top_p):
	generate_params = {
	GenParams.MAX_NEW_TOKENS: max_tokens,
	GenParams.MIN_NEW_TOKENS: min_tokens,
	GenParams.DECODING_METHOD: decoding,
	GenParams.TEMPERATURE: temperature,
	GenParams.TOP_K: top_k,
	GenParams.TOP_P: top_p,
	}

	model = Model(
	model_id=model_type,
	params=generate_params,
	credentials={
	"apikey": api_key,
	"url": url
	},
	project_id=watsonx_project_id
	)

	return model

	def get_model_test(model_type, max_tokens, min_tokens, decoding, temperature):
	generate_params = {
	GenParams.MAX_NEW_TOKENS: max_tokens,
	GenParams.MIN_NEW_TOKENS: min_tokens,
	GenParams.DECODING_METHOD: decoding,
	GenParams.TEMPERATURE: temperature
	}

	model = Model(
	model_id=model_type,
	params=generate_params,
	credentials={
	"apikey": api_key,
	"url": url
	},
	project_id=watsonx_project_id
	)

	return model

	# Set up cache directory (consider user-defined location)
	current_dir = os.getcwd()
	cache_dir = os.path.join(current_dir, ".cache")
	# Create cache directory if necessary
	if not os.path.exists(cache_dir):
	os.makedirs(cache_dir)
	# Set the Hugging Face cache directory
	os.environ["HF_HOME"] = cache_dir
	# Download the model (specify the correct model identifier)
	model_name = 'sentence-transformers/all-MiniLM-L6-v2'
	#model_name = "all-MiniLM-L6-v2"
	model = SentenceTransformer(model_name, cache_folder=cache_dir)
	# Print confirmation message
	print(f"Model '{model_name}' downloaded and loaded from cache directory: {cache_dir}")

	# Embedding function
	class MiniLML6V2EmbeddingFunction(EmbeddingFunction):
	MODEL = model
	def __call__(self, texts):
	return MiniLML6V2EmbeddingFunction.MODEL.encode(texts).tolist()

	def extract_text(url):
	try:
	# Send an HTTP GET request to the URL
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	# Parse the HTML content of the page using BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Extract contents of <p> elements
	p_contents = [p.get_text() for p in soup.find_all('p')]

	# Print the contents of <p> elements
	print("\nContents of <p> elements: \n")
	for content in p_contents:
	print(content)
	raw_web_text = " ".join(p_contents)
	# remove \xa0 which is used in html to avoid words break acorss lines.
	cleaned_text = raw_web_text.replace("\xa0", " ")
	return cleaned_text
	else:
	print(f"Failed to retrieve the page. Status code: {response.status_code}")

	except Exception as e:
	print(f"An error occurred: {str(e)}")


	def split_text_into_sentences(text):
	nlp = spacy.load("en_core_web_md")
	doc = nlp(text)
	sentences = [sent.text for sent in doc.sents]
	cleaned_sentences = [s.strip() for s in sentences]
	return cleaned_sentences

	def create_embedding(url, collection_name,client):
	cleaned_text = extract_text(url)
	cleaned_sentences = split_text_into_sentences(cleaned_text)
	collection = client.get_or_create_collection(collection_name)
	# Upload text to chroma
	collection.upsert(
	documents=cleaned_sentences,
	metadatas=[{"source": str(i)} for i in range(len(cleaned_sentences))],
	ids=[str(i) for i in range(len(cleaned_sentences))],
	)

	return collection


	def create_prompt_old(url, question, collection_name, client):
	# Create embeddings for the text file
	collection = create_embedding(url, collection_name, client)

	# query relevant information
	relevant_chunks = collection.query(
	query_texts=[question],
	n_results=5,
	)
	context = "\n\n\n".join(relevant_chunks["documents"][0])
	# Please note that this is a generic format. You can change this format to be specific to llama
	prompt = (f"{context}\n\nPlease answer the following question in one sentence using this "
	+ f"text. "
	+ f"If the question is unanswerable, say \"unanswerable\". Do not include information that's not relevant to the question."
	+ f"Question: {question}")

	return prompt

	def create_prompt(url, question, collection_name,client):
	try:
	# Create embeddings for the text file
	collection = create_embedding(url, collection_name,client)
	except Exception as e:
	return f"Error creating embeddings: {e}"

	try:
	# Query relevant information
	relevant_chunks = collection.query(
	query_texts=[question],
	n_results=5,
	)
	context = "\n\n\n".join(relevant_chunks["documents"][0])
	except Exception as e:
	return f"Error querying the collection: {e}"

	# Create the prompt
	prompt = (
	"<\|begin_of_text\|>\n"
	"<\|start_header_id\|>system<\|end_header_id\|>\n"
	"You are a helpful AI assistant.\n"
	"<\|eot_id\|>\n"
	"<\|start_header_id\|>user<\|end_header_id\|>\n"
	f"### Context:\n{context}\n\n"
	f"### Instruction:\n"
	f"Please answer the following question based on the above context. Your answer should be concise and directly address the question. "
	f"If the question is unanswerable based on the given context, respond with 'unanswerable'.\n\n"
	f"### Question:\n{question}\n"
	"<\|eot_id\|>\n"
	"<\|start_header_id\|>assistant<\|end_header_id\|>\n"
	)

	return prompt



	def main():

	# Get the API key and project id and update global variables
	get_credentials()

	# Try diffrent URLs and questions
	url = "https://huggingface.co/learn/nlp-course/chapter1/2?fw=pt"

	question = "What is NLP?"
	collection_name = "test_web_RAG"

	answer_questions_from_web(api_key, watsonx_project_id, url, question, collection_name)


	def answer_questions_from_web(request_api_key, request_project_id, url, question, collection_name,client):
	# Update the global variable
	globals()["api_key"] = request_api_key
	globals()["watsonx_project_id"] = request_project_id

	# Specify model parameters
	model_type = "meta-llama/llama-2-70b-chat"
	#model_type = "meta-llama/llama-3-70b-instruct"
	max_tokens = 100
	min_tokens = 50
	top_k = 50
	top_p = 1
	decoding = DecodingMethods.GREEDY
	temperature = 0.7

	# Get the watsonx model = try both options
	model = get_model(model_type, max_tokens, min_tokens, decoding, temperature, top_k, top_p)
	# Get client Chromadb
	client = chromadb.Client()

	# Get the prompt
	complete_prompt = create_prompt(url, question, collection_name,client)

	# Let's review the prompt
	print("----------------------------------------------------------------------------------------------------")
	print("* Prompt:" + complete_prompt + "*")
	print("----------------------------------------------------------------------------------------------------")

	generated_response = model.generate(prompt=complete_prompt)
	response_text = generated_response['results'][0]['generated_text']

	# Remove trailing white spaces
	response_text = response_text.strip()

	# print model response
	print("--------------------------------- Generated response -----------------------------------")
	print(response_text)
	print("*********************************************************************************************")

	return response_text

	# Invoke the main function
	if __name__ == "__main__":
	main()