MohammedAlakhras commited on
Commit
b902a61
·
1 Parent(s): 5d638fc

Upload 6 files

Browse files
Files changed (6) hide show
  1. HuggingChatAPI.py +65 -0
  2. exportchat.py +49 -0
  3. packages.txt +1 -0
  4. promptTemplate.py +90 -0
  5. requirements.txt +19 -0
  6. streamlit_app.py +1001 -0
HuggingChatAPI.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from hugchat import hugchat
3
+ from hugchat.login import Login
4
+ from langchain.llms.base import LLM
5
+ from typing import Optional, List, Mapping, Any
6
+ from time import sleep
7
+
8
+
9
+ # THIS IS A CUSTOM LLM WRAPPER Based on hugchat library
10
+ # Reference :
11
+ # - Langchain custom LLM wrapper : https://python.langchain.com/docs/modules/model_io/models/llms/how_to/custom_llm
12
+ # - HugChat library : https://github.com/Soulter/hugging-chat-api
13
+
14
+ class HuggingChat(LLM):
15
+ """HuggingChat LLM wrapper."""
16
+ chatbot : Optional[hugchat.ChatBot] = None
17
+ conversation : Optional[str] = ""
18
+ email : Optional[str]
19
+ psw : Optional[str]
20
+
21
+
22
+
23
+ @property
24
+ def _llm_type(self) -> str:
25
+ return "custom"
26
+
27
+ def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
28
+ if stop is not None:
29
+ pass
30
+
31
+ if self.chatbot is None:
32
+ if self.email is None and self.psw is None:
33
+ ValueError("Email and Password is required, pls check the documentation on github : https://github.com/Soulter/hugging-chat-api")
34
+ else:
35
+ if self.conversation == "":
36
+ sign = Login(self.email, self.psw) # type: ignore
37
+ cookies = sign.login()
38
+
39
+ # Create a ChatBot
40
+ self.chatbot = hugchat.ChatBot(cookies=cookies.get_dict())
41
+
42
+ id = self.chatbot.new_conversation()
43
+ self.chatbot.change_conversation(id)
44
+ self.conversation = id
45
+ else:
46
+ self.chatbot.change_conversation(self.conversation) # type: ignore
47
+
48
+
49
+ data = self.chatbot.chat(prompt, temperature=0.4, stream=False) # type: ignore
50
+ return data # type: ignore
51
+
52
+ @property
53
+ def _identifying_params(self) -> Mapping[str, Any]:
54
+ """Get the identifying parameters."""
55
+ return {"model": "HuggingCHAT"}
56
+
57
+
58
+
59
+ #llm = HuggingChat(email = "YOUR-EMAIL" , psw = = "YOUR-PSW" ) #for start new chat
60
+
61
+
62
+ #print(llm("Hello, how are you?"))
63
+ #print(llm("what is AI?"))
64
+ #print(llm("Can you resume your previus answer?")) #now memory work well
65
+
exportchat.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PYTHON FILE FOR EXPORT CHAT FUNCTION
3
+ """
4
+
5
+ import streamlit as st
6
+ from datetime import datetime
7
+
8
+
9
+ def export_chat():
10
+ if 'generated' in st.session_state:
11
+ # save message in reverse order frist message always bot
12
+ # the chat is stored in a html file format
13
+ html_chat = ""
14
+ html_chat += '<html><head><title>ChatBOT Intelligenza Artificiale Italia 🧠🤖🇮🇹</title>'
15
+ #create two simply css box for bot and user like whatsapp
16
+ html_chat += '<style> .bot { background-color: #e5e5ea; padding: 10px; border-radius: 10px; margin: 10px; width: 50%; float: left; } .user { background-color: #dcf8c6; padding: 10px; border-radius: 10px; margin: 10px; width: 50%; float: right; } </style>'
17
+ html_chat += '</head><body>'
18
+ #add header
19
+ html_chat += '<center><h1>ChatBOT Intelligenza Artificiale Italia 🧠🤖🇮🇹</h1>'
20
+ #add link for danation
21
+ html_chat += '<h3>🤗 Support the project with a donation for the development of new features 🤗</h3>'
22
+ html_chat += '<br><a href="https://rebrand.ly/SupportAUTOGPTfree"><img src="https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif" alt="PayPal donate button" /></a>'
23
+ #add subheader with date and time
24
+ html_chat += '<br><br><h5>' + datetime.now().strftime("%d/%m/%Y %H:%M:%S") + '</h5></center><br><br>'
25
+ #add chat
26
+ #add solid container
27
+ html_chat += '<div style="padding: 10px; border-radius: 10px; margin: 10px; width: 100%; float: left;">'
28
+ for i in range(len(st.session_state['generated'])-1, -1, -1):
29
+ html_chat += '<div class="bot">' + st.session_state["generated"][i] + '</div><br>'
30
+ html_chat += '<div class="user">' + st.session_state['past'][i] + '</div><br>'
31
+ html_chat += '</div>'
32
+ #add footer
33
+ html_chat += '<br><br><center><small>Thanks you for using our ChatBOT 🧠🤖🇮🇹</small>'
34
+ #add link for danation
35
+ html_chat += '<h6>🤗 Support the project with a donation for the development of new features 🤗</h6>'
36
+ html_chat += '<br><a href="https://rebrand.ly/SupportAUTOGPTfree"><img src="https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif" alt="PayPal donate button" /></a><center>'
37
+
38
+ html_chat += '</body></html>'
39
+
40
+ #save file
41
+ with open('chat.html', 'w') as f:
42
+ f.write(html_chat)
43
+ #download file
44
+ st.download_button(
45
+ label="📚 Download chat",
46
+ data=html_chat,
47
+ file_name='chat.html',
48
+ mime='text/html'
49
+ )
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
promptTemplate.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This file contains the template for the prompt to be used for injecting the context into the model.
3
+
4
+ With this technique we can use different plugin for different type of question and answer.
5
+ Like :
6
+ - Internet
7
+ - Data
8
+ - Code
9
+ - PDF
10
+ - Audio
11
+ - Video
12
+
13
+ """
14
+
15
+ from datetime import datetime
16
+ now = datetime.now()
17
+
18
+ def prompt4conversation(prompt,context):
19
+ final_prompt = f""" GENERAL INFORMATION : ( today is {now.strftime("%d/%m/%Y %H:%M:%S")} , You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
20
+ ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE , WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
21
+ PREVIUS MESSAGE : ({context})
22
+ NOW THE USER ASK : {prompt} .
23
+ WRITE THE ANSWER :"""
24
+ return final_prompt
25
+
26
+ def prompt4conversationInternet(prompt,context, internet, resume):
27
+ final_prompt = f""" GENERAL INFORMATION : ( today is {now.strftime("%d/%m/%Y %H:%M:%S")} , You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
28
+ ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE , WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
29
+ PREVIUS MESSAGE : ({context})
30
+ NOW THE USER ASK : {prompt}.
31
+ INTERNET RESULT TO USE TO ANSWER : ({internet})
32
+ INTERNET RESUME : ({resume})
33
+ NOW THE USER ASK : {prompt}.
34
+ WRITE THE ANSWER BASED ON INTERNET INFORMATION :"""
35
+ return final_prompt
36
+
37
+ def prompt4Data(prompt, context, solution):
38
+ final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
39
+ ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE , YOU MUST MAKE THE CORRECT ANSWER MORE ARGUMENTED ! IF THE CORRECT ANSWER CONTAINS CODE YOU ARE OBLIGED TO INSERT IT IN YOUR NEW ANSWER!
40
+ PREVIUS MESSAGE : ({context})
41
+ NOW THE USER ASK : {prompt}
42
+ THIS IS THE CORRECT ANSWER : ({solution})
43
+ MAKE THE ANSWER MORE ARGUMENTED, WITHOUT CHANGING ANYTHING OF THE CORRECT ANSWER :"""
44
+ return final_prompt
45
+
46
+ def prompt4Code(prompt, context, solution):
47
+ final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
48
+ ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE , THE CORRECT ANSWER CONTAINS CODE YOU ARE OBLIGED TO INSERT IT IN YOUR NEW ANSWER!
49
+ PREVIUS MESSAGE : ({context})
50
+ NOW THE USER ASK : {prompt}
51
+ THIS IS THE CODE FOR THE ANSWER : ({solution})
52
+ WITHOUT CHANGING ANYTHING OF THE CODE of CORRECT ANSWER , MAKE THE ANSWER MORE DETALIED INCLUDING THE CORRECT CODE :"""
53
+ return final_prompt
54
+
55
+
56
+ def prompt4Context(prompt, context, solution):
57
+ final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
58
+ ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE ,WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
59
+ PREVIUS MESSAGE : ({context})
60
+ NOW THE USER ASK : {prompt}
61
+ THIS IS THE CORRECT ANSWER : ({solution})
62
+ WITHOUT CHANGING ANYTHING OF CORRECT ANSWER , MAKE THE ANSWER MORE DETALIED:"""
63
+ return final_prompt
64
+
65
+
66
+ def prompt4Audio(prompt, context, solution):
67
+ final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
68
+ ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE ,WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
69
+ PREVIUS MESSAGE : ({context})
70
+ NOW THE USER ASK : {prompt}
71
+ THIS IS THE CORRECT ANSWER based on Audio text gived in input : ({solution})
72
+ WITHOUT CHANGING ANYTHING OF CORRECT ANSWER , MAKE THE ANSWER MORE DETALIED:"""
73
+ return final_prompt
74
+
75
+ def prompt4YT(prompt, context, solution):
76
+ final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
77
+ ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE ,WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
78
+ PREVIUS MESSAGE : ({context})
79
+ NOW THE USER ASK : {prompt}
80
+ THIS IS THE CORRECT ANSWER based on Youtube video gived in input : ({solution})
81
+ WITHOUT CHANGING ANYTHING OF CORRECT ANSWER , MAKE THE ANSWER MORE DETALIED:"""
82
+ return final_prompt
83
+
84
+
85
+ #HOW TO ADD YOUR OWN PROMPT :
86
+ # 1) ADD YOUR FUNCTION HERE, for example : def prompt4Me(prompt, context):
87
+ # 2) WRITE THE PROMPT TEMPLATE FOR YOUR FUNCTION, for example : template = f"YOU IS : {context} , NOW THE USER ASK : {prompt} . WRITE THE ANSWER :"
88
+ # 3) RETURN THE TEMPLATE, for example : return template
89
+ # 4) IMPORT YOUR FUNCTION IN THE MAIN FILE (streamlit_app.py) , for example : from promptTemplate import prompt4Me
90
+ # 5) FOLLOW OTHER SPTEP IN THE MAIN FILE (streamlit_app.py)
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ beautifulsoup4==4.12.2
2
+ docx2txt==0.8
3
+ duckduckgo_search==3.8.3
4
+ hugchat==0.0.8
5
+ langchain==0.0.219
6
+ pandas==2.0.1
7
+ pdfplumber==0.9.0
8
+ pydub==0.25.1
9
+ requests
10
+ sketch==0.4.2
11
+ SpeechRecognition==3.8.1
12
+ streamlit==1.24.0
13
+ streamlit_extras==0.2.7
14
+ youtube_search_python==1.6.6
15
+ youtube_transcript_api==0.6.1
16
+ chromadb==0.3.26
17
+ ffmpeg-python
18
+ ffprobe
19
+ huggingface_hub
streamlit_app.py ADDED
@@ -0,0 +1,1001 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import random
3
+ import shutil
4
+ import string
5
+ from zipfile import ZipFile
6
+ import streamlit as st
7
+ from streamlit_extras.colored_header import colored_header
8
+ from streamlit_extras.add_vertical_space import add_vertical_space
9
+ from hugchat import hugchat
10
+ from hugchat.login import Login
11
+ import pandas as pd
12
+ import asyncio
13
+ loop = asyncio.new_event_loop()
14
+ asyncio.set_event_loop(loop)
15
+ import sketch
16
+ from langchain.text_splitter import CharacterTextSplitter
17
+ from promptTemplate import prompt4conversation, prompt4Data, prompt4Code, prompt4Context, prompt4Audio, prompt4YT
18
+ from promptTemplate import prompt4conversationInternet
19
+ # FOR DEVELOPMENT NEW PLUGIN
20
+ # from promptTemplate import yourPLUGIN
21
+ from exportchat import export_chat
22
+ from langchain.vectorstores import Chroma
23
+ from langchain.chains import RetrievalQA
24
+ from HuggingChatAPI import HuggingChat
25
+ from langchain.embeddings import HuggingFaceHubEmbeddings
26
+ from youtube_transcript_api import YouTubeTranscriptApi
27
+ import requests
28
+ from bs4 import BeautifulSoup
29
+ import speech_recognition as sr
30
+ import pdfplumber
31
+ import docx2txt
32
+ from duckduckgo_search import DDGS
33
+ from itertools import islice
34
+ from os import path
35
+ from pydub import AudioSegment
36
+ import os
37
+
38
+
39
+ hf = None
40
+ repo_id = "sentence-transformers/all-mpnet-base-v2"
41
+
42
+ if 'hf_token' in st.session_state:
43
+ if 'hf' not in st.session_state:
44
+ hf = HuggingFaceHubEmbeddings(
45
+ repo_id=repo_id,
46
+ task="feature-extraction",
47
+ huggingfacehub_api_token=st.session_state['hf_token'],
48
+ ) # type: ignore
49
+ st.session_state['hf'] = hf
50
+
51
+
52
+
53
+ st.set_page_config(
54
+ page_title="Talk with evrythings💬", page_icon="🤗", layout="wide", initial_sidebar_state="expanded"
55
+ )
56
+
57
+ st.markdown('<style>.css-w770g5{\
58
+ width: 100%;}\
59
+ .css-b3z5c9{ \
60
+ width: 100%;}\
61
+ .stButton>button{\
62
+ width: 100%;}\
63
+ .stDownloadButton>button{\
64
+ width: 100%;}\
65
+ </style>', unsafe_allow_html=True)
66
+
67
+
68
+
69
+
70
+
71
+
72
+ # Sidebar contents for logIN, choose plugin, and export chat
73
+ with st.sidebar:
74
+ st.title('🤗💬 PersonalChat App')
75
+
76
+ if 'hf_email' not in st.session_state or 'hf_pass' not in st.session_state:
77
+ with st.expander("ℹ️ Login in Hugging Face", expanded=True):
78
+ st.write("⚠️ You need to login in Hugging Face to use this app. You can register [here](https://huggingface.co/join).")
79
+ st.header('Hugging Face Login')
80
+ hf_email = st.text_input('Enter E-mail:')
81
+ hf_pass = st.text_input('Enter password:', type='password')
82
+ hf_token = st.text_input('Enter API Token:', type='password')
83
+ if st.button('Login 🚀') and hf_email and hf_pass and hf_token:
84
+ with st.spinner('🚀 Logging in...'):
85
+ st.session_state['hf_email'] = hf_email
86
+ st.session_state['hf_pass'] = hf_pass
87
+ st.session_state['hf_token'] = hf_token
88
+
89
+ try:
90
+
91
+ sign = Login(st.session_state['hf_email'], st.session_state['hf_pass'])
92
+ cookies = sign.login()
93
+ chatbot = hugchat.ChatBot(cookies=cookies.get_dict())
94
+ except Exception as e:
95
+ st.error(e)
96
+ st.info("⚠️ Please check your credentials and try again.")
97
+ st.error("⚠️ dont abuse the API")
98
+ st.warning("⚠️ If you don't have an account, you can register [here](https://huggingface.co/join).")
99
+ from time import sleep
100
+ sleep(3)
101
+ del st.session_state['hf_email']
102
+ del st.session_state['hf_pass']
103
+ del st.session_state['hf_token']
104
+ st.experimental_rerun()
105
+
106
+ st.session_state['chatbot'] = chatbot
107
+
108
+ id = st.session_state['chatbot'].new_conversation()
109
+ st.session_state['chatbot'].change_conversation(id)
110
+
111
+ st.session_state['conversation'] = id
112
+ # Generate empty lists for generated and past.
113
+ ## generated stores AI generated responses
114
+ if 'generated' not in st.session_state:
115
+ st.session_state['generated'] = ["I'm **IA ITALIA chat**, How may I help you ? "]
116
+ ## past stores User's questions
117
+ if 'past' not in st.session_state:
118
+ st.session_state['past'] = ['Hi!']
119
+
120
+ st.session_state['LLM'] = HuggingChat(email=st.session_state['hf_email'], psw=st.session_state['hf_pass'])
121
+
122
+ st.experimental_rerun()
123
+
124
+
125
+ else:
126
+ with st.expander("ℹ️ Advanced Settings"):
127
+ #temperature: Optional[float]. Default is 0.5
128
+ #top_p: Optional[float]. Default is 0.95
129
+ #repetition_penalty: Optional[float]. Default is 1.2
130
+ #top_k: Optional[int]. Default is 50
131
+ #max_new_tokens: Optional[int]. Default is 1024
132
+
133
+ temperature = st.slider('🌡 Temperature', min_value=0.1, max_value=1.0, value=0.5, step=0.01)
134
+ top_p = st.slider('💡 Top P', min_value=0.1, max_value=1.0, value=0.95, step=0.01)
135
+ repetition_penalty = st.slider('🖌 Repetition Penalty', min_value=1.0, max_value=2.0, value=1.2, step=0.01)
136
+ top_k = st.slider('❄️ Top K', min_value=1, max_value=100, value=50, step=1)
137
+ max_new_tokens = st.slider('📝 Max New Tokens', min_value=1, max_value=1024, value=1024, step=1)
138
+
139
+
140
+ # FOR DEVELOPMENT NEW PLUGIN YOU MUST ADD IT HERE INTO THE LIST
141
+ # YOU NEED ADD THE NAME AT 144 LINE
142
+
143
+ #plugins for conversation
144
+ plugins = ["🛑 No PLUGIN","🌐 Web Search", "🔗 Talk with Website" , "📋 Talk with your DATA", "📝 Talk with your DOCUMENTS", "🎧 Talk with your AUDIO", "🎥 Talk with YT video", "🧠 GOD MODE" ,"💾 Upload saved VectorStore"]
145
+ if 'plugin' not in st.session_state:
146
+ st.session_state['plugin'] = st.selectbox('🔌 Plugins', plugins, index=0)
147
+ else:
148
+ if st.session_state['plugin'] == "🛑 No PLUGIN":
149
+ st.session_state['plugin'] = st.selectbox('🔌 Plugins', plugins, index=plugins.index(st.session_state['plugin']))
150
+
151
+
152
+ # FOR DEVELOPMENT NEW PLUGIN FOLLOW THIS TEMPLATE
153
+ # PLUGIN TEMPLATE
154
+ # if st.session_state['plugin'] == "🔌 PLUGIN NAME" and 'PLUGIN NAME' not in st.session_state:
155
+ # # PLUGIN SETTINGS
156
+ # with st.expander("🔌 PLUGIN NAME Settings", expanded=True):
157
+ # if 'PLUGIN NAME' not in st.session_state or st.session_state['PLUGIN NAME'] == False:
158
+ # # PLUGIN CODE
159
+ # st.session_state['PLUGIN NAME'] = True
160
+ # elif st.session_state['PLUGIN NAME'] == True:
161
+ # # PLUGIN CODE
162
+ # if st.button('🔌 Disable PLUGIN NAME'):
163
+ # st.session_state['plugin'] = "🛑 No PLUGIN"
164
+ # st.session_state['PLUGIN NAME'] = False
165
+ # del ALL SESSION STATE VARIABLES RELATED TO PLUGIN
166
+ # st.experimental_rerun()
167
+ # # PLUGIN UPLOADER
168
+ # if st.session_state['PLUGIN NAME'] == True:
169
+ # with st.expander("🔌 PLUGIN NAME Uploader", expanded=True):
170
+ # # PLUGIN UPLOADER CODE
171
+ # load file
172
+ # if load file and st.button('🔌 Upload PLUGIN NAME'):
173
+ # qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
174
+ # st.session_state['PLUGIN DB'] = qa
175
+ # st.experimental_rerun()
176
+ #
177
+
178
+
179
+
180
+ # WEB SEARCH PLUGIN
181
+ if st.session_state['plugin'] == "🌐 Web Search" and 'web_search' not in st.session_state:
182
+ # web search settings
183
+ with st.expander("🌐 Web Search Settings", expanded=True):
184
+ if 'web_search' not in st.session_state or st.session_state['web_search'] == False:
185
+ reg = ['us-en', 'uk-en', 'it-it']
186
+ sf = ['on', 'moderate', 'off']
187
+ tl = ['d', 'w', 'm', 'y']
188
+ if 'region' not in st.session_state:
189
+ st.session_state['region'] = st.selectbox('🗺 Region', reg, index=1)
190
+ else:
191
+ st.session_state['region'] = st.selectbox('🗺 Region', reg, index=reg.index(st.session_state['region']))
192
+ if 'safesearch' not in st.session_state:
193
+ st.session_state['safesearch'] = st.selectbox('🚨 Safe Search', sf, index=1)
194
+ else:
195
+ st.session_state['safesearch'] = st.selectbox('🚨 Safe Search', sf, index=sf.index(st.session_state['safesearch']))
196
+ if 'timelimit' not in st.session_state:
197
+ st.session_state['timelimit'] = st.selectbox('📅 Time Limit', tl, index=1)
198
+ else:
199
+ st.session_state['timelimit'] = st.selectbox('📅 Time Limit', tl, index=tl.index(st.session_state['timelimit']))
200
+ if 'max_results' not in st.session_state:
201
+ st.session_state['max_results'] = st.slider('📊 Max Results', min_value=1, max_value=5, value=2, step=1)
202
+ else:
203
+ st.session_state['max_results'] = st.slider('📊 Max Results', min_value=1, max_value=5, value=st.session_state['max_results'], step=1)
204
+ if st.button('🌐 Save change'):
205
+ st.session_state['web_search'] = "True"
206
+ st.experimental_rerun()
207
+
208
+ elif st.session_state['plugin'] == "🌐 Web Search" and st.session_state['web_search'] == 'True':
209
+ with st.expander("🌐 Web Search Settings", expanded=True):
210
+ st.write('🚀 Web Search is enabled')
211
+ st.write('🗺 Region: ', st.session_state['region'])
212
+ st.write('🚨 Safe Search: ', st.session_state['safesearch'])
213
+ st.write('📅 Time Limit: ', st.session_state['timelimit'])
214
+ if st.button('🌐🛑 Disable Web Search'):
215
+ del st.session_state['web_search']
216
+ del st.session_state['region']
217
+ del st.session_state['safesearch']
218
+ del st.session_state['timelimit']
219
+ del st.session_state['max_results']
220
+ del st.session_state['plugin']
221
+ st.experimental_rerun()
222
+
223
+ # GOD MODE PLUGIN
224
+ if st.session_state['plugin'] == "🧠 GOD MODE" and 'god_mode' not in st.session_state:
225
+ with st.expander("🧠 GOD MODE Settings", expanded=True):
226
+ if 'god_mode' not in st.session_state or st.session_state['god_mode'] == False:
227
+ topic = st.text_input('🔎 Topic', "Artificial Intelligence in Finance")
228
+ web_result = st.checkbox('🌐 Web Search', value=True, disabled=True)
229
+ yt_result = st.checkbox('🎥 YT Search', value=True, disabled=True)
230
+ website_result = st.checkbox('🔗 Website Search', value=True, disabled=True)
231
+ deep_of_search = st.slider('📊 Deep of Search', min_value=1, max_value=5, value=2, step=1)
232
+ if st.button('🧠✅ Give knowledge to the model'):
233
+ full_text = []
234
+ links = []
235
+ news = []
236
+ yt_ids = []
237
+ source = []
238
+ if web_result == True:
239
+ internet_result = ""
240
+ internet_answer = ""
241
+ with DDGS() as ddgs:
242
+ with st.spinner('🌐 Searching on the web...'):
243
+ ddgs_gen = ddgs.text(topic, region="us-en")
244
+ for r in islice(ddgs_gen, deep_of_search):
245
+ l = r['href']
246
+ source.append(l)
247
+ links.append(l)
248
+ internet_result += str(r) + "\n\n"
249
+
250
+ fast_answer = ddgs.news(topic)
251
+ for r in islice(fast_answer, deep_of_search):
252
+ internet_answer += str(r) + "\n\n"
253
+ l = r['url']
254
+ source.append(l)
255
+ news.append(r)
256
+
257
+
258
+ full_text.append(internet_result)
259
+ full_text.append(internet_answer)
260
+
261
+ if yt_result == True:
262
+ with st.spinner('🎥 Searching on YT...'):
263
+ from youtubesearchpython import VideosSearch
264
+ videosSearch = VideosSearch(topic, limit = deep_of_search)
265
+ yt_result = videosSearch.result()
266
+ for i in yt_result['result']: # type: ignore
267
+ duration = i['duration'] # type: ignore
268
+ duration = duration.split(':')
269
+ if len(duration) == 3:
270
+ #skip videos longer than 1 hour
271
+ if int(duration[0]) > 1:
272
+ continue
273
+ if len(duration) == 2:
274
+ #skip videos longer than 30 minutes
275
+ if int(duration[0]) > 30:
276
+ continue
277
+ yt_ids.append(i['id']) # type: ignore
278
+ source.append("https://www.youtube.com/watch?v="+i['id']) # type: ignore
279
+ full_text.append(i['title']) # type: ignore
280
+
281
+
282
+ if website_result == True:
283
+ for l in links:
284
+ try:
285
+ with st.spinner(f'👨‍💻 Scraping website : {l}'):
286
+ r = requests.get(l)
287
+ soup = BeautifulSoup(r.content, 'html.parser')
288
+ full_text.append(soup.get_text()+"\n\n")
289
+ except:
290
+ pass
291
+
292
+ for id in yt_ids:
293
+ try:
294
+ yt_video_txt= []
295
+ with st.spinner(f'👨‍💻 Scraping YT video : {id}'):
296
+ transcript_list = YouTubeTranscriptApi.list_transcripts(id)
297
+ transcript_en = None
298
+ last_language = ""
299
+ for transcript in transcript_list:
300
+ if transcript.language_code == 'en':
301
+ transcript_en = transcript
302
+ break
303
+ else:
304
+ last_language = transcript.language_code
305
+ if transcript_en is None:
306
+ transcript_en = transcript_list.find_transcript([last_language])
307
+ transcript_en = transcript_en.translate('en')
308
+
309
+ text = transcript_en.fetch()
310
+ yt_video_txt.append(text)
311
+
312
+ for i in range(len(yt_video_txt)):
313
+ for j in range(len(yt_video_txt[i])):
314
+ full_text.append(yt_video_txt[i][j]['text'])
315
+
316
+
317
+ except:
318
+ pass
319
+
320
+ with st.spinner('🧠 Building vectorstore with knowledge...'):
321
+ full_text = "\n".join(full_text)
322
+ st.session_state['god_text'] = [full_text]
323
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
324
+ texts = text_splitter.create_documents([full_text])
325
+ # Select embeddings
326
+ embeddings = st.session_state['hf']
327
+ # Create a vectorstore from documents
328
+ random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
329
+ db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
330
+
331
+ with st.spinner('🔨 Saving vectorstore...'):
332
+ # save vectorstore
333
+ db.persist()
334
+ #create .zip file of directory to download
335
+ shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
336
+ # save in session state and download
337
+ st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
338
+
339
+ with st.spinner('🔨 Creating QA chain...'):
340
+ # Create retriever interface
341
+ retriever = db.as_retriever()
342
+ # Create QA chain
343
+ qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
344
+ st.session_state['god_mode'] = qa
345
+ st.session_state['god_mode_source'] = source
346
+ st.session_state['god_mode_info'] = "🧠 GOD MODE have builded a vectorstore about **" + topic + f"**. The knowledge is based on\n- {len(news)} news🗞\n- {len(yt_ids)} YT videos📺\n- {len(links)} websites🌐 \n"
347
+
348
+ st.experimental_rerun()
349
+
350
+
351
+ if st.session_state['plugin'] == "🧠 GOD MODE" and 'god_mode' in st.session_state:
352
+ with st.expander("**✅ GOD MODE is enabled 🚀**", expanded=True):
353
+ st.markdown(st.session_state['god_mode_info'])
354
+ if 'db' in st.session_state:
355
+ # leave ./ from name for download
356
+ file_name = st.session_state['db'][2:]
357
+ st.download_button(
358
+ label="📩 Download vectorstore",
359
+ data=open(file_name, 'rb').read(),
360
+ file_name=file_name,
361
+ mime='application/zip'
362
+ )
363
+ if st.button('🧠🛑 Disable GOD MODE'):
364
+ del st.session_state['god_mode']
365
+ del st.session_state['db']
366
+ del st.session_state['god_text']
367
+ del st.session_state['god_mode_info']
368
+ del st.session_state['god_mode_source']
369
+ del st.session_state['plugin']
370
+ st.experimental_rerun()
371
+
372
+
373
+ # DATA PLUGIN
374
+ if st.session_state['plugin'] == "📋 Talk with your DATA" and 'df' not in st.session_state:
375
+ with st.expander("📋 Talk with your DATA", expanded= True):
376
+ upload_csv = st.file_uploader("Upload your CSV", type=['csv'])
377
+ if upload_csv is not None:
378
+ df = pd.read_csv(upload_csv)
379
+ st.session_state['df'] = df
380
+ st.experimental_rerun()
381
+ if st.session_state['plugin'] == "📋 Talk with your DATA":
382
+ if st.button('🛑📋 Remove DATA from context'):
383
+ if 'df' in st.session_state:
384
+ del st.session_state['df']
385
+ del st.session_state['plugin']
386
+ st.experimental_rerun()
387
+
388
+
389
+
390
+ # DOCUMENTS PLUGIN
391
+ if st.session_state['plugin'] == "📝 Talk with your DOCUMENTS" and 'documents' not in st.session_state:
392
+ with st.expander("📝 Talk with your DOCUMENT", expanded=True):
393
+ upload_pdf = st.file_uploader("Upload your DOCUMENT", type=['txt', 'pdf', 'docx'], accept_multiple_files=True)
394
+ if upload_pdf is not None and st.button('📝✅ Load Documents'):
395
+ documents = []
396
+ with st.spinner('🔨 Reading documents...'):
397
+ for upload_pdf in upload_pdf:
398
+ print(upload_pdf.type)
399
+ if upload_pdf.type == 'text/plain':
400
+ documents += [upload_pdf.read().decode()]
401
+ elif upload_pdf.type == 'application/pdf':
402
+ with pdfplumber.open(upload_pdf) as pdf:
403
+ documents += [page.extract_text() for page in pdf.pages]
404
+ elif upload_pdf.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
405
+ text = docx2txt.process(upload_pdf)
406
+ documents += [text]
407
+ st.session_state['documents'] = documents
408
+ # Split documents into chunks
409
+ with st.spinner('🔨 Creating vectorstore...'):
410
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
411
+ texts = text_splitter.create_documents(documents)
412
+ # Select embeddings
413
+ embeddings = st.session_state['hf']
414
+ # Create a vectorstore from documents
415
+ random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
416
+ db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
417
+
418
+ with st.spinner('🔨 Saving vectorstore...'):
419
+ # save vectorstore
420
+ db.persist()
421
+ #create .zip file of directory to download
422
+ shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
423
+ # save in session state and download
424
+ st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
425
+
426
+ with st.spinner('🔨 Creating QA chain...'):
427
+ # Create retriever interface
428
+ retriever = db.as_retriever()
429
+ # Create QA chain
430
+ qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
431
+ st.session_state['pdf'] = qa
432
+
433
+ st.experimental_rerun()
434
+
435
+ if st.session_state['plugin'] == "📝 Talk with your DOCUMENTS":
436
+ if 'db' in st.session_state:
437
+ # leave ./ from name for download
438
+ file_name = st.session_state['db'][2:]
439
+ st.download_button(
440
+ label="📩 Download vectorstore",
441
+ data=open(file_name, 'rb').read(),
442
+ file_name=file_name,
443
+ mime='application/zip'
444
+ )
445
+ if st.button('🛑📝 Remove PDF from context'):
446
+ if 'pdf' in st.session_state:
447
+ del st.session_state['db']
448
+ del st.session_state['pdf']
449
+ del st.session_state['documents']
450
+ del st.session_state['plugin']
451
+
452
+ st.experimental_rerun()
453
+
454
+ # AUDIO PLUGIN
455
+ if st.session_state['plugin'] == "🎧 Talk with your AUDIO" and 'audio' not in st.session_state:
456
+ with st.expander("🎙 Talk with your AUDIO", expanded=True):
457
+ f = st.file_uploader("Upload your AUDIO", type=['wav', 'mp3'])
458
+ if f is not None:
459
+ if f.type == 'audio/mpeg':
460
+ #convert mp3 to wav
461
+ with st.spinner('🔨 Converting mp3 to wav...'):
462
+ #save mp3
463
+ with open('audio.mp3', 'wb') as out:
464
+ out.write(f.read())
465
+ #convert to wav
466
+ sound = AudioSegment.from_mp3("audio.mp3")
467
+ sound.export("audio.wav", format="wav")
468
+ file_name = 'audio.wav'
469
+ else:
470
+ with open(f.name, 'wb') as out:
471
+ out.write(f.read())
472
+
473
+ bytes_data = f.read()
474
+ file_name = f.name
475
+
476
+ r = sr.Recognizer()
477
+ #Given audio file must be a filename string or a file-like object
478
+
479
+
480
+ with st.spinner('🔨 Reading audio...'):
481
+ with sr.AudioFile(file_name) as source:
482
+ # listen for the data (load audio to memory)
483
+ audio_data = r.record(source)
484
+ # recognize (convert from speech to text)
485
+ text = r.recognize_google(audio_data)
486
+ data = [text]
487
+ # data = query(bytes_data)
488
+ with st.spinner('🎙 Creating Vectorstore...'):
489
+
490
+ #split text into chunks
491
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
492
+ texts = text_splitter.create_documents(text)
493
+
494
+ embeddings = st.session_state['hf']
495
+ # Create a vectorstore from documents
496
+ random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
497
+ db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
498
+ # save vectorstore
499
+
500
+ with st.spinner('🎙 Saving Vectorstore...'):
501
+ db.persist()
502
+ #create .zip file of directory to download
503
+ shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
504
+ # save in session state and download
505
+ st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
506
+
507
+ with st.spinner('🎙 Creating QA chain...'):
508
+ # Create retriever interface
509
+ retriever = db.as_retriever()
510
+ # Create QA chain
511
+ qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
512
+ st.session_state['audio'] = qa
513
+ st.session_state['audio_text'] = text
514
+ st.experimental_rerun()
515
+
516
+ if st.session_state['plugin'] == "🎧 Talk with your AUDIO":
517
+ if 'db' in st.session_state:
518
+ # leave ./ from name for download
519
+ file_name = st.session_state['db'][2:]
520
+ st.download_button(
521
+ label="📩 Download vectorstore",
522
+ data=open(file_name, 'rb').read(),
523
+ file_name=file_name,
524
+ mime='application/zip'
525
+ )
526
+ if st.button('🛑🎙 Remove AUDIO from context'):
527
+ if 'audio' in st.session_state:
528
+ del st.session_state['db']
529
+ del st.session_state['audio']
530
+ del st.session_state['audio_text']
531
+ del st.session_state['plugin']
532
+ st.experimental_rerun()
533
+
534
+
535
+ # YT PLUGIN
536
+ if st.session_state['plugin'] == "🎥 Talk with YT video" and 'yt' not in st.session_state:
537
+ with st.expander("🎥 Talk with YT video", expanded=True):
538
+ yt_url = st.text_input("1.📺 Enter a YouTube URL")
539
+ yt_url2 = st.text_input("2.📺 Enter a YouTube URL")
540
+ yt_url3 = st.text_input("3.📺 Enter a YouTube URL")
541
+ if yt_url is not None and st.button('🎥✅ Add YouTube video to context'):
542
+ if yt_url != "":
543
+ video = 1
544
+ yt_url = yt_url.split("=")[1]
545
+ if yt_url2 != "":
546
+ yt_url2 = yt_url2.split("=")[1]
547
+ video = 2
548
+ if yt_url3 != "":
549
+ yt_url3 = yt_url3.split("=")[1]
550
+ video = 3
551
+
552
+ text_yt = []
553
+ text_list = []
554
+ for i in range(video):
555
+ with st.spinner(f'🎥 Extracting TEXT from YouTube video {str(i)} ...'):
556
+ #get en subtitles
557
+ transcript_list = YouTubeTranscriptApi.list_transcripts(yt_url)
558
+ transcript_en = None
559
+ last_language = ""
560
+ for transcript in transcript_list:
561
+ if transcript.language_code == 'en':
562
+ transcript_en = transcript
563
+ break
564
+ else:
565
+ last_language = transcript.language_code
566
+ if transcript_en is None:
567
+ transcript_en = transcript_list.find_transcript([last_language])
568
+ transcript_en = transcript_en.translate('en')
569
+
570
+ text = transcript_en.fetch()
571
+ text_yt.append(text)
572
+
573
+ for i in range(len(text_yt)):
574
+ for j in range(len(text_yt[i])):
575
+ text_list.append(text_yt[i][j]['text'])
576
+
577
+ # creating a vectorstore
578
+
579
+ with st.spinner('🎥 Creating Vectorstore...'):
580
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
581
+ texts = text_splitter.create_documents(text_list)
582
+ # Select embeddings
583
+ embeddings = st.session_state['hf']
584
+ # Create a vectorstore from documents
585
+ random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
586
+ db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
587
+
588
+ with st.spinner('🎥 Saving Vectorstore...'):
589
+ # save vectorstore
590
+ db.persist()
591
+ #create .zip file of directory to download
592
+ shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
593
+ # save in session state and download
594
+ st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
595
+
596
+ with st.spinner('🎥 Creating QA chain...'):
597
+ # Create retriever interface
598
+ retriever = db.as_retriever()
599
+ # Create QA chain
600
+ qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
601
+ st.session_state['yt'] = qa
602
+ st.session_state['yt_text'] = text_list
603
+ st.experimental_rerun()
604
+
605
+ if st.session_state['plugin'] == "🎥 Talk with YT video":
606
+ if 'db' in st.session_state:
607
+ # leave ./ from name for download
608
+ file_name = st.session_state['db'][2:]
609
+ st.download_button(
610
+ label="📩 Download vectorstore",
611
+ data=open(file_name, 'rb').read(),
612
+ file_name=file_name,
613
+ mime='application/zip'
614
+ )
615
+
616
+ if st.button('🛑🎥 Remove YT video from context'):
617
+ if 'yt' in st.session_state:
618
+ del st.session_state['db']
619
+ del st.session_state['yt']
620
+ del st.session_state['yt_text']
621
+ del st.session_state['plugin']
622
+ st.experimental_rerun()
623
+
624
+ # WEBSITE PLUGIN
625
+ if st.session_state['plugin'] == "🔗 Talk with Website" and 'web_sites' not in st.session_state:
626
+ with st.expander("🔗 Talk with Website", expanded=True):
627
+ web_url = st.text_area("🔗 Enter a website URLs , one for each line")
628
+ if web_url is not None and st.button('🔗✅ Add website to context'):
629
+ if web_url != "":
630
+ text = []
631
+ #max 10 websites
632
+ with st.spinner('🔗 Extracting TEXT from Websites ...'):
633
+ for url in web_url.split("\n")[:10]:
634
+ page = requests.get(url)
635
+ soup = BeautifulSoup(page.content, 'html.parser')
636
+ text.append(soup.get_text())
637
+ # creating a vectorstore
638
+
639
+ with st.spinner('🔗 Creating Vectorstore...'):
640
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
641
+ texts = text_splitter.create_documents(text)
642
+ # Select embeddings
643
+ embeddings = st.session_state['hf']
644
+ # Create a vectorstore from documents
645
+ random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
646
+ db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
647
+
648
+ with st.spinner('🔗 Saving Vectorstore...'):
649
+ # save vectorstore
650
+ db.persist()
651
+ #create .zip file of directory to download
652
+ shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
653
+ # save in session state and download
654
+ st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
655
+
656
+ with st.spinner('🔗 Creating QA chain...'):
657
+ # Create retriever interface
658
+ retriever = db.as_retriever()
659
+ # Create QA chain
660
+ qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
661
+ st.session_state['web_sites'] = qa
662
+ st.session_state['web_text'] = text
663
+ st.experimental_rerun()
664
+
665
+ if st.session_state['plugin'] == "🔗 Talk with Website":
666
+ if 'db' in st.session_state:
667
+ # leave ./ from name for download
668
+ file_name = st.session_state['db'][2:]
669
+ st.download_button(
670
+ label="📩 Download vectorstore",
671
+ data=open(file_name, 'rb').read(),
672
+ file_name=file_name,
673
+ mime='application/zip'
674
+ )
675
+
676
+ if st.button('🛑🔗 Remove Website from context'):
677
+ if 'web_sites' in st.session_state:
678
+ del st.session_state['db']
679
+ del st.session_state['web_sites']
680
+ del st.session_state['web_text']
681
+ del st.session_state['plugin']
682
+ st.experimental_rerun()
683
+
684
+
685
+ # UPLOAD PREVIUS VECTORSTORE
686
+ if st.session_state['plugin'] == "💾 Upload saved VectorStore" and 'old_db' not in st.session_state:
687
+ with st.expander("💾 Upload saved VectorStore", expanded=True):
688
+ db_file = st.file_uploader("Upload a saved VectorStore", type=["zip"])
689
+ if db_file is not None and st.button('✅💾 Add saved VectorStore to context'):
690
+ if db_file != "":
691
+ with st.spinner('💾 Extracting VectorStore...'):
692
+ # unzip file in a new directory
693
+ with ZipFile(db_file, 'r') as zipObj:
694
+ # Extract all the contents of zip file in different directory
695
+ random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
696
+ zipObj.extractall("chroma_db_" + random_str)
697
+ # save in session state the path of the directory
698
+ st.session_state['old_db'] = "chroma_db_" + random_str
699
+ hf = st.session_state['hf']
700
+ # Create retriever interface
701
+ db = Chroma("chroma_db_" + random_str, embedding_function=hf)
702
+
703
+ with st.spinner('💾 Creating QA chain...'):
704
+ retriever = db.as_retriever()
705
+ # Create QA chain
706
+ qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
707
+ st.session_state['old_db'] = qa
708
+ st.experimental_rerun()
709
+
710
+ if st.session_state['plugin'] == "💾 Upload saved VectorStore":
711
+ if st.button('🛑💾 Remove VectorStore from context'):
712
+ if 'old_db' in st.session_state:
713
+ del st.session_state['old_db']
714
+ del st.session_state['plugin']
715
+ st.experimental_rerun()
716
+
717
+
718
+ # END OF PLUGIN
719
+ add_vertical_space(4)
720
+ if 'hf_email' in st.session_state:
721
+ if st.button('🗑 Logout'):
722
+ keys = list(st.session_state.keys())
723
+ for key in keys:
724
+ del st.session_state[key]
725
+ st.experimental_rerun()
726
+
727
+ export_chat()
728
+ add_vertical_space(5)
729
+ html_chat = '<center><h6>🤗 Support the project with a donation for the development of new features 🤗</h6>'
730
+ html_chat += '<br><a href="https://rebrand.ly/SupportAUTOGPTfree"><img src="https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif" alt="PayPal donate button" /></a><center><br>'
731
+ st.markdown(html_chat, unsafe_allow_html=True)
732
+ st.write('Made with ❤️ by [Alessandro CIciarelli](https://intelligenzaartificialeitalia.net)')
733
+
734
+ ##### End of sidebar
735
+
736
+
737
+ # User input
738
+ # Layout of input/response containers
739
+ input_container = st.container()
740
+ response_container = st.container()
741
+ data_view_container = st.container()
742
+ loading_container = st.container()
743
+
744
+
745
+
746
+ ## Applying the user input box
747
+ with input_container:
748
+ input_text = st.chat_input("🧑‍💻 Write here 👇", key="input")
749
+
750
+ with data_view_container:
751
+ if 'df' in st.session_state:
752
+ with st.expander("🤖 View your **DATA**"):
753
+ st.data_editor(st.session_state['df'], use_container_width=True)
754
+ if 'pdf' in st.session_state:
755
+ with st.expander("🤖 View your **DOCUMENTs**"):
756
+ st.write(st.session_state['documents'])
757
+ if 'audio' in st.session_state:
758
+ with st.expander("🤖 View your **AUDIO**"):
759
+ st.write(st.session_state['audio_text'])
760
+ if 'yt' in st.session_state:
761
+ with st.expander("🤖 View your **YT video**"):
762
+ st.write(st.session_state['yt_text'])
763
+ if 'web_text' in st.session_state:
764
+ with st.expander("🤖 View the **Website content**"):
765
+ st.write(st.session_state['web_text'])
766
+ if 'old_db' in st.session_state:
767
+ with st.expander("🗂 View your **saved VectorStore**"):
768
+ st.success("📚 VectorStore loaded")
769
+ if 'god_mode_source' in st.session_state:
770
+ with st.expander("🌍 View source"):
771
+ for s in st.session_state['god_mode_source']:
772
+ st.markdown("- " + s)
773
+
774
+ # Response output
775
+ ## Function for taking user prompt as input followed by producing AI generated responses
776
+ def generate_response(prompt):
777
+ final_prompt = ""
778
+ make_better = True
779
+ source = ""
780
+
781
+ with loading_container:
782
+
783
+ # FOR DEVELOPMENT PLUGIN
784
+ # if st.session_state['plugin'] == "🔌 PLUGIN NAME" and 'PLUGIN DB' in st.session_state:
785
+ # with st.spinner('🚀 Using PLUGIN NAME...'):
786
+ # solution = st.session_state['PLUGIN DB']({"query": prompt})
787
+ # final_prompt = YourCustomPrompt(prompt, context)
788
+
789
+
790
+ if st.session_state['plugin'] == "📋 Talk with your DATA" and 'df' in st.session_state:
791
+ #get only last message
792
+ context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
793
+ if prompt.find('python') != -1 or prompt.find('Code') != -1 or prompt.find('code') != -1 or prompt.find('Python') != -1:
794
+ with st.spinner('🚀 Using tool for python code...'):
795
+ solution = "\n```python\n"
796
+ solution += st.session_state['df'].sketch.howto(prompt, call_display=False)
797
+ solution += "\n```\n\n"
798
+ final_prompt = prompt4Code(prompt, context, solution)
799
+ else:
800
+ with st.spinner('🚀 Using tool to get information...'):
801
+ solution = st.session_state['df'].sketch.ask(prompt, call_display=False)
802
+ final_prompt = prompt4Data(prompt, context, solution)
803
+
804
+
805
+ elif st.session_state['plugin'] == "📝 Talk with your DOCUMENTS" and 'pdf' in st.session_state:
806
+ #get only last message
807
+ context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
808
+ with st.spinner('🚀 Using tool to get information...'):
809
+ result = st.session_state['pdf']({"query": prompt})
810
+ solution = result["result"]
811
+ if len(solution.split()) > 110:
812
+ make_better = False
813
+ final_prompt = solution
814
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
815
+ final_prompt += "\n\n✅Source:\n"
816
+ for d in result["source_documents"]:
817
+ final_prompt += "- " + str(d) + "\n"
818
+ else:
819
+ final_prompt = prompt4Context(prompt, context, solution)
820
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
821
+ source += "\n\n✅Source:\n"
822
+ for d in result["source_documents"]:
823
+ source += "- " + str(d) + "\n"
824
+
825
+
826
+ elif st.session_state['plugin'] == "🧠 GOD MODE" and 'god_mode' in st.session_state:
827
+ #get only last message
828
+ context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
829
+ with st.spinner('🚀 Using tool to get information...'):
830
+ result = st.session_state['god_mode']({"query": prompt})
831
+ solution = result["result"]
832
+ if len(solution.split()) > 110:
833
+ make_better = False
834
+ final_prompt = solution
835
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
836
+ final_prompt += "\n\n✅Source:\n"
837
+ for d in result["source_documents"]:
838
+ final_prompt += "- " + str(d) + "\n"
839
+ else:
840
+ final_prompt = prompt4Context(prompt, context, solution)
841
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
842
+ source += "\n\n✅Source:\n"
843
+ for d in result["source_documents"]:
844
+ source += "- " + str(d) + "\n"
845
+
846
+
847
+ elif st.session_state['plugin'] == "🔗 Talk with Website" and 'web_sites' in st.session_state:
848
+ #get only last message
849
+ context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
850
+ with st.spinner('🚀 Using tool to get information...'):
851
+ result = st.session_state['web_sites']({"query": prompt})
852
+ solution = result["result"]
853
+ if len(solution.split()) > 110:
854
+ make_better = False
855
+ final_prompt = solution
856
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
857
+ final_prompt += "\n\n✅Source:\n"
858
+ for d in result["source_documents"]:
859
+ final_prompt += "- " + str(d) + "\n"
860
+ else:
861
+ final_prompt = prompt4Context(prompt, context, solution)
862
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
863
+ source += "\n\n✅Source:\n"
864
+ for d in result["source_documents"]:
865
+ source += "- " + str(d) + "\n"
866
+
867
+
868
+
869
+ elif st.session_state['plugin'] == "💾 Upload saved VectorStore" and 'old_db' in st.session_state:
870
+ #get only last message
871
+ context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
872
+ with st.spinner('🚀 Using tool to get information...'):
873
+ result = st.session_state['old_db']({"query": prompt})
874
+ solution = result["result"]
875
+ if len(solution.split()) > 110:
876
+ make_better = False
877
+ final_prompt = solution
878
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
879
+ final_prompt += "\n\n✅Source:\n"
880
+ for d in result["source_documents"]:
881
+ final_prompt += "- " + str(d) + "\n"
882
+ else:
883
+ final_prompt = prompt4Context(prompt, context, solution)
884
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
885
+ source += "\n\n✅Source:\n"
886
+ for d in result["source_documents"]:
887
+ source += "- " + str(d) + "\n"
888
+
889
+
890
+ elif st.session_state['plugin'] == "🎧 Talk with your AUDIO" and 'audio' in st.session_state:
891
+ #get only last message
892
+ context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
893
+ with st.spinner('🚀 Using tool to get information...'):
894
+ result = st.session_state['audio']({"query": prompt})
895
+ solution = result["result"]
896
+ if len(solution.split()) > 110:
897
+ make_better = False
898
+ final_prompt = solution
899
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
900
+ final_prompt += "\n\n✅Source:\n"
901
+ for d in result["source_documents"]:
902
+ final_prompt += "- " + str(d) + "\n"
903
+ else:
904
+ final_prompt = prompt4Audio(prompt, context, solution)
905
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
906
+ source += "\n\n✅Source:\n"
907
+ for d in result["source_documents"]:
908
+ source += "- " + str(d) + "\n"
909
+
910
+
911
+ elif st.session_state['plugin'] == "🎥 Talk with YT video" and 'yt' in st.session_state:
912
+ context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
913
+ with st.spinner('🚀 Using tool to get information...'):
914
+ result = st.session_state['yt']({"query": prompt})
915
+ solution = result["result"]
916
+ if len(solution.split()) > 110:
917
+ make_better = False
918
+ final_prompt = solution
919
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
920
+ final_prompt += "\n\n✅Source:\n"
921
+ for d in result["source_documents"]:
922
+ final_prompt += "- " + str(d) + "\n"
923
+ else:
924
+ final_prompt = prompt4YT(prompt, context, solution)
925
+ if 'source_documents' in result and len(result["source_documents"]) > 0:
926
+ source += "\n\n✅Source:\n"
927
+ for d in result["source_documents"]:
928
+ source += "- " + str(d) + "\n"
929
+
930
+
931
+ else:
932
+ #get last message if exists
933
+ if len(st.session_state['past']) == 1:
934
+ context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
935
+ else:
936
+ context = f"User: {st.session_state['past'][-2]}\nBot: {st.session_state['generated'][-2]}\nUser: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
937
+
938
+ if 'web_search' in st.session_state:
939
+ if st.session_state['web_search'] == "True":
940
+ with st.spinner('🚀 Using internet to get information...'):
941
+ internet_result = ""
942
+ internet_answer = ""
943
+ with DDGS() as ddgs:
944
+ ddgs_gen = ddgs.text(prompt, region=st.session_state['region'], safesearch=st.session_state['safesearch'], timelimit=st.session_state['timelimit'])
945
+ for r in islice(ddgs_gen, st.session_state['max_results']):
946
+ internet_result += str(r) + "\n\n"
947
+ fast_answer = ddgs.answers(prompt)
948
+ for r in islice(fast_answer, 2):
949
+ internet_answer += str(r) + "\n\n"
950
+
951
+ final_prompt = prompt4conversationInternet(prompt, context, internet_result, internet_answer)
952
+ else:
953
+ final_prompt = prompt4conversation(prompt, context)
954
+ else:
955
+ final_prompt = prompt4conversation(prompt, context)
956
+
957
+ if make_better:
958
+ with st.spinner('🚀 Generating response...'):
959
+ print(final_prompt)
960
+ response = st.session_state['chatbot'].chat(final_prompt, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, top_k=top_k, max_new_tokens=max_new_tokens)
961
+ response += source
962
+ else:
963
+ print(final_prompt)
964
+ response = final_prompt
965
+
966
+ return response
967
+
968
+ ## Conditional display of AI generated responses as a function of user provided prompts
969
+ with response_container:
970
+ if input_text and 'hf_email' in st.session_state and 'hf_pass' in st.session_state:
971
+ response = generate_response(input_text)
972
+ st.session_state.past.append(input_text)
973
+ st.session_state.generated.append(response)
974
+
975
+
976
+ #print message in normal order, frist user then bot
977
+ if 'generated' in st.session_state:
978
+ if st.session_state['generated']:
979
+ for i in range(len(st.session_state['generated'])):
980
+ with st.chat_message(name="user"):
981
+ st.markdown(st.session_state['past'][i])
982
+
983
+ with st.chat_message(name="assistant"):
984
+ if len(st.session_state['generated'][i].split("✅Source:")) > 1:
985
+ source = st.session_state['generated'][i].split("✅Source:")[1]
986
+ mess = st.session_state['generated'][i].split("✅Source:")[0]
987
+
988
+ st.markdown(mess)
989
+ with st.expander("📚 Source of message number " + str(i+1)):
990
+ st.markdown(source)
991
+
992
+ else:
993
+ st.markdown(st.session_state['generated'][i])
994
+
995
+ st.markdown('', unsafe_allow_html=True)
996
+
997
+
998
+ else:
999
+ st.info("👋 Hey , we are very happy to see you here 🤗")
1000
+ st.info("👉 Please Login to continue, click on top left corner to login 🚀")
1001
+ st.error("👉 If you are not registered on Hugging Face, please register first and then login 🤗")