Spaces:

pondsaga
/

fund-learn-chatbot

Runtime error

File size: 8,535 Bytes

from __future__ import annotations
from typing import Iterable
import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes
import time
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import numpy as np
import openai
import gradio as gr
import os
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader
from langchain.vectorstores import FAISS
import glob
import pandas as pd
import re
from openai.embeddings_utils import get_embedding, cosine_similarity
import tiktoken
import base64
import time

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def input_to_image(input_en_sentence):
  input_en_sentence =  input_en_sentence

  sentences = ["SCBGOLD policy", "SCBGOLD risk", "SCBGOLD value", "SCBGOLD price","O.R. OR Stock policy detail what is", "why O.R. OR stock go down news risk", "O.R. OR value", "O.R. OR price"]
  image_urls = ['/image/SCBGOLD-Diagram.png',
                '/image/SCBGOLD-Risk.png',
                '/image/SCBGOLD-Chart.png',
                '/image/SCBGOLD-Chart.png',
                '/image/OR-Stock.png',
                '/image/OR-Risk.jpg',
                '/image/OR-Chart.png',
                '/image/OR-Chart.png',]

  input_embedding = model.encode(input_en_sentence)
  sentence_embeddings = model.encode(sentences)

  similarity_scores = util.pytorch_cos_sim(input_embedding, sentence_embeddings)

  index_max = np.argmax(similarity_scores.numpy()[0])
  value_max = np.max(similarity_scores.numpy()[0])

  print(similarity_scores)
  if value_max >= 0.37:
    return image_urls[index_max]
  else:
    return 'None'

theme_1 = gr.themes.Monochrome(
    font=[gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai'), gr.themes.GoogleFont('Noto Sans Thai')],
).set(
    link_text_color='*primary_600',
    prose_text_weight='300',
    block_label_text_weight='500'
)

css_1 = """

.message-wrap.svelte-1pjfiar>div.svelte-1pjfiar .svelte-1pjfiar:not(.avatar-container) img {
  border-radius: 0 !important;
  max-height: none !important;
  max-width: 40vw !important;
}

.gradio_container {
    background: linear-gradient(to right, blue, green);
}

.gallery.svelte-1viwdyg {
    color: black;
}

.message.svelte-1pjfiar.svelte-1pjfiar.svelte-1pjfiar {
    background: white;
    position: relative;
    display: flex;
    flex-direction: column;
    align-self: flex-end;
    text-align: left;
    background: var(--background-fill-secondary);
    width: calc(65% - var(--spacing-xxl));
    color: var(--body-text-color);
    font-size: var(--text-lg);
    line-height: var(--line-lg);
    overflow-wrap: break-word;
    overflow-x: hidden;
    padding-right: calc(var(--spacing-xxl) + var(--spacing-md));
    padding: calc(var(--spacing-sm) + var(--spacing-sm));
    box-shadow: rgba(0, 0, 0, 0.16) 0px 1px 4px;
    border: none;

.img {
  border-radius: 0 !important;
  max-height: 400px !important;
  max-width: none !important;
}

.message-wrap.svelte-1pjfiar>div.svelte-1pjfiar .svelte-1pjfiar:not(.avatar-container) img {
  border-radius: 0 !important;
  max-height: none !important;
  max-width: 40vw !important;
}

.label.svelte-13hsdno.svelte-13hsdno.svelte-13hsdno {
  color: black
  }

.gradio-container-4-1-2 .prose > *:first-child {
  display: flex;
  justify-content: center;
  font-size: 50px;
  font-weight: bold;
  margin-top: 2px;
  font-family: 'Inter';
}

"""

description="<p>FundLearn Chatbot is your trusted companion on the journey to financial literacy and investment success in Malaysia. Powered by cutting-edge Language Model technology (LLM), FundLearn brings you a seamless and interactive learning experience tailored to the unique landscape of the Malaysian investment market.</p>"

css = css_1
theme = theme_1

openai.api_key = OPENAI_API_KEY  # Replace with your key

def predict(message, history):
    def image_to_base64(image_path):
        with open(image_path, "rb") as image_file:
            encoded_string = base64.b64encode(image_file.read()).decode()
        return f"data:image/jpeg;base64,{encoded_string}"


    def normalize_text(s, sep_token = " \n "):
        s = re.sub(r'\s+',  ' ', s).strip()
        s = re.sub(r". ,","",s)
        s = s.replace("..",".")
        s = s.replace(". .",".")
        s = s.replace("\n", "")
        s = s.strip()
        return s

    def sim_text(input_text):
        pdf_paths = glob.glob('/pdf/*.pdf')

        df = pd.DataFrame(columns=['text'])

        for path in pdf_paths:
            loader = PyPDFLoader(path)
            pages = loader.load_and_split()
            faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY))
            docs = faiss_index.similarity_search(input_text, k=5)
            for doc in docs:
                df.loc[len(df.index)] = doc.page_content
        df['text']= df["text"].apply(lambda x : normalize_text(x))
        tokenizer = tiktoken.get_encoding("cl100k_base")

        df['n_tokens'] = df["text"].apply(lambda x: len(tokenizer.encode(x)))
        df = df[df.n_tokens<8192]

        embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

        df['ada_v2'] = df["text"].apply(lambda x : embeddings.embed_query(x))

        embedding = get_embedding(
                input_text,
                engine="text-embedding-ada-002" # engine should be set to the deployment name you chose when you deployed the text-embedding-ada-002 (Version 2) model
            )

        df["similarities"] = df.ada_v2.apply(lambda x: cosine_similarity(x, embedding))
        res = (
                df.sort_values("similarities", ascending=False)
                .head(3)
            )

        top5_text = " \n ".join(res.text[:1].values)

        return top5_text

    start_time = time.time()
    persona = """
    You are the good advice investor chatbot teach people to understanding the basics,
    risk management strategies, and methods for portfolio diversification.
    Be an AI-guided education on these topics, along with some practical tips and advice for getting started in both stock market investing in Bursa Malaysia
    """
    history_openai_format = [{"role": "system", "content": persona}]
    if len(history)>0:
      for human, assistant in history[-1:]:
          history_openai_format.append({"role": "user", "content": human })
          history_openai_format.append({"role": "assistant", "content":assistant})
    history_openai_format.append({"role": "assistant", "content": sim_text(message)})
    history_openai_format.append({"role": "user", "content":message})
    end_time = time.time()
    execution_time = start_time - end_time
    print("history Execution time: ",execution_time)

    start_time = time.time()
    response = openai.ChatCompletion.create(
        model='gpt-3.5-turbo-0125',
        messages= history_openai_format,
        temperature=0.1,
        stream=True
    )
    end_time = time.time()
    execution_time = start_time - end_time
    print("response Execution time: ",execution_time)

    partial_message = ""
    for chunk in response:
        try:
            chunk_message = chunk['choices'][0]['delta']['content']  # extract the message
            partial_message = partial_message + chunk_message
            yield partial_message
        except:
            pass

    image_path = input_to_image(message)

    if image_path != 'None':
      base64_image = image_to_base64(image_path)
      image_text = f"<br><br><img src='{base64_image}' height='20vh'>"
      partial_message += image_text
      print('Show image!')
    yield partial_message

examples = [
    "How can I start investing in the Bursa Malaysia as a beginner?",
    "What are some popular investment options available in Malaysia?",
    "What are the key factors to consider before investing in a property in Malaysia"
]
gr.ChatInterface(predict,chatbot=gr.Chatbot(height=600),css = css,theme = theme,examples=examples,title='FundLearn Chatbot', description=description ,retry_btn=None,undo_btn=None).queue().launch(share=True, debug=True)