Spaces:

pondsaga
/

fund-learn-chatbot

Runtime error

App Files Files Community

fund-learn-chatbot / app.py

pondsaga

Upload app.py

953aac1 verified almost 2 years ago

raw

history blame contribute delete

8.54 kB

	from __future__ import annotations
	from typing import Iterable
	import gradio as gr
	from gradio.themes.base import Base
	from gradio.themes.utils import colors, fonts, sizes
	import time
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer, util
	import numpy as np
	import openai
	import gradio as gr
	import os
	from langchain.document_loaders import PyMuPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import Chroma
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.chat_models import ChatOpenAI
	from langchain.document_loaders import PyPDFLoader
	from langchain.chains import RetrievalQA
	from langchain.document_loaders import DirectoryLoader
	from langchain.vectorstores import FAISS
	import glob
	import pandas as pd
	import re
	from openai.embeddings_utils import get_embedding, cosine_similarity
	import tiktoken
	import base64
	import time

	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	def input_to_image(input_en_sentence):
	input_en_sentence = input_en_sentence

	sentences = ["SCBGOLD policy", "SCBGOLD risk", "SCBGOLD value", "SCBGOLD price","O.R. OR Stock policy detail what is", "why O.R. OR stock go down news risk", "O.R. OR value", "O.R. OR price"]
	image_urls = ['/image/SCBGOLD-Diagram.png',
	'/image/SCBGOLD-Risk.png',
	'/image/SCBGOLD-Chart.png',
	'/image/SCBGOLD-Chart.png',
	'/image/OR-Stock.png',
	'/image/OR-Risk.jpg',
	'/image/OR-Chart.png',
	'/image/OR-Chart.png',]

	input_embedding = model.encode(input_en_sentence)
	sentence_embeddings = model.encode(sentences)

	similarity_scores = util.pytorch_cos_sim(input_embedding, sentence_embeddings)

	index_max = np.argmax(similarity_scores.numpy()[0])
	value_max = np.max(similarity_scores.numpy()[0])

	print(similarity_scores)
	if value_max >= 0.37:
	return image_urls[index_max]
	else:
	return 'None'

	theme_1 = gr.themes.Monochrome(
	font=[gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai')],
	).set(
	link_text_color='*primary_600',
	prose_text_weight='300',
	block_label_text_weight='500'
	)

	css_1 = """

	.message-wrap.svelte-1pjfiar>div.svelte-1pjfiar .svelte-1pjfiar:not(.avatar-container) img {
	border-radius: 0 !important;
	max-height: none !important;
	max-width: 40vw !important;
	}

	.gradio_container {
	background: linear-gradient(to right, blue, green);
	}

	.gallery.svelte-1viwdyg {
	color: black;
	}

	.message.svelte-1pjfiar.svelte-1pjfiar.svelte-1pjfiar {
	background: white;
	position: relative;
	display: flex;
	flex-direction: column;
	align-self: flex-end;
	text-align: left;
	background: var(--background-fill-secondary);
	width: calc(65% - var(--spacing-xxl));
	color: var(--body-text-color);
	font-size: var(--text-lg);
	line-height: var(--line-lg);
	overflow-wrap: break-word;
	overflow-x: hidden;
	padding-right: calc(var(--spacing-xxl) + var(--spacing-md));
	padding: calc(var(--spacing-sm) + var(--spacing-sm));
	box-shadow: rgba(0, 0, 0, 0.16) 0px 1px 4px;
	border: none;

	.img {
	border-radius: 0 !important;
	max-height: 400px !important;
	max-width: none !important;
	}

	.message-wrap.svelte-1pjfiar>div.svelte-1pjfiar .svelte-1pjfiar:not(.avatar-container) img {
	border-radius: 0 !important;
	max-height: none !important;
	max-width: 40vw !important;
	}

	.label.svelte-13hsdno.svelte-13hsdno.svelte-13hsdno {
	color: black
	}

	.gradio-container-4-1-2 .prose > *:first-child {
	display: flex;
	justify-content: center;
	font-size: 50px;
	font-weight: bold;
	margin-top: 2px;
	font-family: 'Inter';
	}

	"""

	description="<p>FundLearn Chatbot is your trusted companion on the journey to financial literacy and investment success in Malaysia. Powered by cutting-edge Language Model technology (LLM), FundLearn brings you a seamless and interactive learning experience tailored to the unique landscape of the Malaysian investment market.</p>"

	css = css_1
	theme = theme_1

	openai.api_key = OPENAI_API_KEY # Replace with your key

	def predict(message, history):
	def image_to_base64(image_path):
	with open(image_path, "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read()).decode()
	return f"data:image/jpeg;base64,{encoded_string}"


	def normalize_text(s, sep_token = " \n "):
	s = re.sub(r'\s+', ' ', s).strip()
	s = re.sub(r". ,","",s)
	s = s.replace("..",".")
	s = s.replace(". .",".")
	s = s.replace("\n", "")
	s = s.strip()
	return s

	def sim_text(input_text):
	pdf_paths = glob.glob('/pdf/*.pdf')

	df = pd.DataFrame(columns=['text'])

	for path in pdf_paths:
	loader = PyPDFLoader(path)
	pages = loader.load_and_split()
	faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY))
	docs = faiss_index.similarity_search(input_text, k=5)
	for doc in docs:
	df.loc[len(df.index)] = doc.page_content
	df['text']= df["text"].apply(lambda x : normalize_text(x))
	tokenizer = tiktoken.get_encoding("cl100k_base")

	df['n_tokens'] = df["text"].apply(lambda x: len(tokenizer.encode(x)))
	df = df[df.n_tokens<8192]

	embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

	df['ada_v2'] = df["text"].apply(lambda x : embeddings.embed_query(x))

	embedding = get_embedding(
	input_text,
	engine="text-embedding-ada-002" # engine should be set to the deployment name you chose when you deployed the text-embedding-ada-002 (Version 2) model
	)

	df["similarities"] = df.ada_v2.apply(lambda x: cosine_similarity(x, embedding))
	res = (
	df.sort_values("similarities", ascending=False)
	.head(3)
	)

	top5_text = " \n ".join(res.text[:1].values)

	return top5_text

	start_time = time.time()
	persona = """
	You are the good advice investor chatbot teach people to understanding the basics,
	risk management strategies, and methods for portfolio diversification.
	Be an AI-guided education on these topics, along with some practical tips and advice for getting started in both stock market investing in Bursa Malaysia
	"""
	history_openai_format = [{"role": "system", "content": persona}]
	if len(history)>0:
	for human, assistant in history[-1:]:
	history_openai_format.append({"role": "user", "content": human })
	history_openai_format.append({"role": "assistant", "content":assistant})
	history_openai_format.append({"role": "assistant", "content": sim_text(message)})
	history_openai_format.append({"role": "user", "content":message})
	end_time = time.time()
	execution_time = start_time - end_time
	print("history Execution time: ",execution_time)

	start_time = time.time()
	response = openai.ChatCompletion.create(
	model='gpt-3.5-turbo-0125',
	messages= history_openai_format,
	temperature=0.1,
	stream=True
	)
	end_time = time.time()
	execution_time = start_time - end_time
	print("response Execution time: ",execution_time)

	partial_message = ""
	for chunk in response:
	try:
	chunk_message = chunk['choices'][0]['delta']['content'] # extract the message
	partial_message = partial_message + chunk_message
	yield partial_message
	except:
	pass

	image_path = input_to_image(message)

	if image_path != 'None':
	base64_image = image_to_base64(image_path)
	image_text = f"<br><br><img src='{base64_image}' height='20vh'>"
	partial_message += image_text
	print('Show image!')
	yield partial_message

	examples = [
	"How can I start investing in the Bursa Malaysia as a beginner?",
	"What are some popular investment options available in Malaysia?",
	"What are the key factors to consider before investing in a property in Malaysia"
	]
	gr.ChatInterface(predict,chatbot=gr.Chatbot(height=600),css = css,theme = theme,examples=examples,title='FundLearn Chatbot', description=description ,retry_btn=None,undo_btn=None).queue().launch(share=True, debug=True)