Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| from typing import Iterable | |
| import gradio as gr | |
| from gradio.themes.base import Base | |
| from gradio.themes.utils import colors, fonts, sizes | |
| import time | |
| from transformers import pipeline | |
| from sentence_transformers import SentenceTransformer, util | |
| import numpy as np | |
| import openai | |
| import gradio as gr | |
| import os | |
| from langchain.document_loaders import PyMuPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import Chroma | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.chains import RetrievalQA | |
| from langchain.document_loaders import DirectoryLoader | |
| from langchain.vectorstores import FAISS | |
| import glob | |
| import pandas as pd | |
| import re | |
| from openai.embeddings_utils import get_embedding, cosine_similarity | |
| import tiktoken | |
| import base64 | |
| import time | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| def input_to_image(input_en_sentence): | |
| input_en_sentence = input_en_sentence | |
| sentences = ["SCBGOLD policy", "SCBGOLD risk", "SCBGOLD value", "SCBGOLD price","O.R. OR Stock policy detail what is", "why O.R. OR stock go down news risk", "O.R. OR value", "O.R. OR price"] | |
| image_urls = ['/image/SCBGOLD-Diagram.png', | |
| '/image/SCBGOLD-Risk.png', | |
| '/image/SCBGOLD-Chart.png', | |
| '/image/SCBGOLD-Chart.png', | |
| '/image/OR-Stock.png', | |
| '/image/OR-Risk.jpg', | |
| '/image/OR-Chart.png', | |
| '/image/OR-Chart.png',] | |
| input_embedding = model.encode(input_en_sentence) | |
| sentence_embeddings = model.encode(sentences) | |
| similarity_scores = util.pytorch_cos_sim(input_embedding, sentence_embeddings) | |
| index_max = np.argmax(similarity_scores.numpy()[0]) | |
| value_max = np.max(similarity_scores.numpy()[0]) | |
| print(similarity_scores) | |
| if value_max >= 0.37: | |
| return image_urls[index_max] | |
| else: | |
| return 'None' | |
| theme_1 = gr.themes.Monochrome( | |
| font=[gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai')], | |
| ).set( | |
| link_text_color='*primary_600', | |
| prose_text_weight='300', | |
| block_label_text_weight='500' | |
| ) | |
| css_1 = """ | |
| .message-wrap.svelte-1pjfiar>div.svelte-1pjfiar .svelte-1pjfiar:not(.avatar-container) img { | |
| border-radius: 0 !important; | |
| max-height: none !important; | |
| max-width: 40vw !important; | |
| } | |
| .gradio_container { | |
| background: linear-gradient(to right, blue, green); | |
| } | |
| .gallery.svelte-1viwdyg { | |
| color: black; | |
| } | |
| .message.svelte-1pjfiar.svelte-1pjfiar.svelte-1pjfiar { | |
| background: white; | |
| position: relative; | |
| display: flex; | |
| flex-direction: column; | |
| align-self: flex-end; | |
| text-align: left; | |
| background: var(--background-fill-secondary); | |
| width: calc(65% - var(--spacing-xxl)); | |
| color: var(--body-text-color); | |
| font-size: var(--text-lg); | |
| line-height: var(--line-lg); | |
| overflow-wrap: break-word; | |
| overflow-x: hidden; | |
| padding-right: calc(var(--spacing-xxl) + var(--spacing-md)); | |
| padding: calc(var(--spacing-sm) + var(--spacing-sm)); | |
| box-shadow: rgba(0, 0, 0, 0.16) 0px 1px 4px; | |
| border: none; | |
| .img { | |
| border-radius: 0 !important; | |
| max-height: 400px !important; | |
| max-width: none !important; | |
| } | |
| .message-wrap.svelte-1pjfiar>div.svelte-1pjfiar .svelte-1pjfiar:not(.avatar-container) img { | |
| border-radius: 0 !important; | |
| max-height: none !important; | |
| max-width: 40vw !important; | |
| } | |
| .label.svelte-13hsdno.svelte-13hsdno.svelte-13hsdno { | |
| color: black | |
| } | |
| .gradio-container-4-1-2 .prose > *:first-child { | |
| display: flex; | |
| justify-content: center; | |
| font-size: 50px; | |
| font-weight: bold; | |
| margin-top: 2px; | |
| font-family: 'Inter'; | |
| } | |
| """ | |
| description="<p>FundLearn Chatbot is your trusted companion on the journey to financial literacy and investment success in Malaysia. Powered by cutting-edge Language Model technology (LLM), FundLearn brings you a seamless and interactive learning experience tailored to the unique landscape of the Malaysian investment market.</p>" | |
| css = css_1 | |
| theme = theme_1 | |
| openai.api_key = OPENAI_API_KEY # Replace with your key | |
| def predict(message, history): | |
| def image_to_base64(image_path): | |
| with open(image_path, "rb") as image_file: | |
| encoded_string = base64.b64encode(image_file.read()).decode() | |
| return f"data:image/jpeg;base64,{encoded_string}" | |
| def normalize_text(s, sep_token = " \n "): | |
| s = re.sub(r'\s+', ' ', s).strip() | |
| s = re.sub(r". ,","",s) | |
| s = s.replace("..",".") | |
| s = s.replace(". .",".") | |
| s = s.replace("\n", "") | |
| s = s.strip() | |
| return s | |
| def sim_text(input_text): | |
| pdf_paths = glob.glob('/pdf/*.pdf') | |
| df = pd.DataFrame(columns=['text']) | |
| for path in pdf_paths: | |
| loader = PyPDFLoader(path) | |
| pages = loader.load_and_split() | |
| faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)) | |
| docs = faiss_index.similarity_search(input_text, k=5) | |
| for doc in docs: | |
| df.loc[len(df.index)] = doc.page_content | |
| df['text']= df["text"].apply(lambda x : normalize_text(x)) | |
| tokenizer = tiktoken.get_encoding("cl100k_base") | |
| df['n_tokens'] = df["text"].apply(lambda x: len(tokenizer.encode(x))) | |
| df = df[df.n_tokens<8192] | |
| embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
| df['ada_v2'] = df["text"].apply(lambda x : embeddings.embed_query(x)) | |
| embedding = get_embedding( | |
| input_text, | |
| engine="text-embedding-ada-002" # engine should be set to the deployment name you chose when you deployed the text-embedding-ada-002 (Version 2) model | |
| ) | |
| df["similarities"] = df.ada_v2.apply(lambda x: cosine_similarity(x, embedding)) | |
| res = ( | |
| df.sort_values("similarities", ascending=False) | |
| .head(3) | |
| ) | |
| top5_text = " \n ".join(res.text[:1].values) | |
| return top5_text | |
| start_time = time.time() | |
| persona = """ | |
| You are the good advice investor chatbot teach people to understanding the basics, | |
| risk management strategies, and methods for portfolio diversification. | |
| Be an AI-guided education on these topics, along with some practical tips and advice for getting started in both stock market investing in Bursa Malaysia | |
| """ | |
| history_openai_format = [{"role": "system", "content": persona}] | |
| if len(history)>0: | |
| for human, assistant in history[-1:]: | |
| history_openai_format.append({"role": "user", "content": human }) | |
| history_openai_format.append({"role": "assistant", "content":assistant}) | |
| history_openai_format.append({"role": "assistant", "content": sim_text(message)}) | |
| history_openai_format.append({"role": "user", "content":message}) | |
| end_time = time.time() | |
| execution_time = start_time - end_time | |
| print("history Execution time: ",execution_time) | |
| start_time = time.time() | |
| response = openai.ChatCompletion.create( | |
| model='gpt-3.5-turbo-0125', | |
| messages= history_openai_format, | |
| temperature=0.1, | |
| stream=True | |
| ) | |
| end_time = time.time() | |
| execution_time = start_time - end_time | |
| print("response Execution time: ",execution_time) | |
| partial_message = "" | |
| for chunk in response: | |
| try: | |
| chunk_message = chunk['choices'][0]['delta']['content'] # extract the message | |
| partial_message = partial_message + chunk_message | |
| yield partial_message | |
| except: | |
| pass | |
| image_path = input_to_image(message) | |
| if image_path != 'None': | |
| base64_image = image_to_base64(image_path) | |
| image_text = f"<br><br><img src='{base64_image}' height='20vh'>" | |
| partial_message += image_text | |
| print('Show image!') | |
| yield partial_message | |
| examples = [ | |
| "How can I start investing in the Bursa Malaysia as a beginner?", | |
| "What are some popular investment options available in Malaysia?", | |
| "What are the key factors to consider before investing in a property in Malaysia" | |
| ] | |
| gr.ChatInterface(predict,chatbot=gr.Chatbot(height=600),css = css,theme = theme,examples=examples,title='FundLearn Chatbot', description=description ,retry_btn=None,undo_btn=None).queue().launch(share=True, debug=True) |