Spaces:
Paused
Paused
Commit
·
b902a61
1
Parent(s):
5d638fc
Upload 6 files
Browse files- HuggingChatAPI.py +65 -0
- exportchat.py +49 -0
- packages.txt +1 -0
- promptTemplate.py +90 -0
- requirements.txt +19 -0
- streamlit_app.py +1001 -0
HuggingChatAPI.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from hugchat import hugchat
|
| 3 |
+
from hugchat.login import Login
|
| 4 |
+
from langchain.llms.base import LLM
|
| 5 |
+
from typing import Optional, List, Mapping, Any
|
| 6 |
+
from time import sleep
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# THIS IS A CUSTOM LLM WRAPPER Based on hugchat library
|
| 10 |
+
# Reference :
|
| 11 |
+
# - Langchain custom LLM wrapper : https://python.langchain.com/docs/modules/model_io/models/llms/how_to/custom_llm
|
| 12 |
+
# - HugChat library : https://github.com/Soulter/hugging-chat-api
|
| 13 |
+
|
| 14 |
+
class HuggingChat(LLM):
|
| 15 |
+
"""HuggingChat LLM wrapper."""
|
| 16 |
+
chatbot : Optional[hugchat.ChatBot] = None
|
| 17 |
+
conversation : Optional[str] = ""
|
| 18 |
+
email : Optional[str]
|
| 19 |
+
psw : Optional[str]
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@property
|
| 24 |
+
def _llm_type(self) -> str:
|
| 25 |
+
return "custom"
|
| 26 |
+
|
| 27 |
+
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
| 28 |
+
if stop is not None:
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
if self.chatbot is None:
|
| 32 |
+
if self.email is None and self.psw is None:
|
| 33 |
+
ValueError("Email and Password is required, pls check the documentation on github : https://github.com/Soulter/hugging-chat-api")
|
| 34 |
+
else:
|
| 35 |
+
if self.conversation == "":
|
| 36 |
+
sign = Login(self.email, self.psw) # type: ignore
|
| 37 |
+
cookies = sign.login()
|
| 38 |
+
|
| 39 |
+
# Create a ChatBot
|
| 40 |
+
self.chatbot = hugchat.ChatBot(cookies=cookies.get_dict())
|
| 41 |
+
|
| 42 |
+
id = self.chatbot.new_conversation()
|
| 43 |
+
self.chatbot.change_conversation(id)
|
| 44 |
+
self.conversation = id
|
| 45 |
+
else:
|
| 46 |
+
self.chatbot.change_conversation(self.conversation) # type: ignore
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
data = self.chatbot.chat(prompt, temperature=0.4, stream=False) # type: ignore
|
| 50 |
+
return data # type: ignore
|
| 51 |
+
|
| 52 |
+
@property
|
| 53 |
+
def _identifying_params(self) -> Mapping[str, Any]:
|
| 54 |
+
"""Get the identifying parameters."""
|
| 55 |
+
return {"model": "HuggingCHAT"}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
#llm = HuggingChat(email = "YOUR-EMAIL" , psw = = "YOUR-PSW" ) #for start new chat
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
#print(llm("Hello, how are you?"))
|
| 63 |
+
#print(llm("what is AI?"))
|
| 64 |
+
#print(llm("Can you resume your previus answer?")) #now memory work well
|
| 65 |
+
|
exportchat.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PYTHON FILE FOR EXPORT CHAT FUNCTION
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import streamlit as st
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def export_chat():
|
| 10 |
+
if 'generated' in st.session_state:
|
| 11 |
+
# save message in reverse order frist message always bot
|
| 12 |
+
# the chat is stored in a html file format
|
| 13 |
+
html_chat = ""
|
| 14 |
+
html_chat += '<html><head><title>ChatBOT Intelligenza Artificiale Italia 🧠🤖🇮🇹</title>'
|
| 15 |
+
#create two simply css box for bot and user like whatsapp
|
| 16 |
+
html_chat += '<style> .bot { background-color: #e5e5ea; padding: 10px; border-radius: 10px; margin: 10px; width: 50%; float: left; } .user { background-color: #dcf8c6; padding: 10px; border-radius: 10px; margin: 10px; width: 50%; float: right; } </style>'
|
| 17 |
+
html_chat += '</head><body>'
|
| 18 |
+
#add header
|
| 19 |
+
html_chat += '<center><h1>ChatBOT Intelligenza Artificiale Italia 🧠🤖🇮🇹</h1>'
|
| 20 |
+
#add link for danation
|
| 21 |
+
html_chat += '<h3>🤗 Support the project with a donation for the development of new features 🤗</h3>'
|
| 22 |
+
html_chat += '<br><a href="https://rebrand.ly/SupportAUTOGPTfree"><img src="https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif" alt="PayPal donate button" /></a>'
|
| 23 |
+
#add subheader with date and time
|
| 24 |
+
html_chat += '<br><br><h5>' + datetime.now().strftime("%d/%m/%Y %H:%M:%S") + '</h5></center><br><br>'
|
| 25 |
+
#add chat
|
| 26 |
+
#add solid container
|
| 27 |
+
html_chat += '<div style="padding: 10px; border-radius: 10px; margin: 10px; width: 100%; float: left;">'
|
| 28 |
+
for i in range(len(st.session_state['generated'])-1, -1, -1):
|
| 29 |
+
html_chat += '<div class="bot">' + st.session_state["generated"][i] + '</div><br>'
|
| 30 |
+
html_chat += '<div class="user">' + st.session_state['past'][i] + '</div><br>'
|
| 31 |
+
html_chat += '</div>'
|
| 32 |
+
#add footer
|
| 33 |
+
html_chat += '<br><br><center><small>Thanks you for using our ChatBOT 🧠🤖🇮🇹</small>'
|
| 34 |
+
#add link for danation
|
| 35 |
+
html_chat += '<h6>🤗 Support the project with a donation for the development of new features 🤗</h6>'
|
| 36 |
+
html_chat += '<br><a href="https://rebrand.ly/SupportAUTOGPTfree"><img src="https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif" alt="PayPal donate button" /></a><center>'
|
| 37 |
+
|
| 38 |
+
html_chat += '</body></html>'
|
| 39 |
+
|
| 40 |
+
#save file
|
| 41 |
+
with open('chat.html', 'w') as f:
|
| 42 |
+
f.write(html_chat)
|
| 43 |
+
#download file
|
| 44 |
+
st.download_button(
|
| 45 |
+
label="📚 Download chat",
|
| 46 |
+
data=html_chat,
|
| 47 |
+
file_name='chat.html',
|
| 48 |
+
mime='text/html'
|
| 49 |
+
)
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
promptTemplate.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
This file contains the template for the prompt to be used for injecting the context into the model.
|
| 3 |
+
|
| 4 |
+
With this technique we can use different plugin for different type of question and answer.
|
| 5 |
+
Like :
|
| 6 |
+
- Internet
|
| 7 |
+
- Data
|
| 8 |
+
- Code
|
| 9 |
+
- PDF
|
| 10 |
+
- Audio
|
| 11 |
+
- Video
|
| 12 |
+
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
now = datetime.now()
|
| 17 |
+
|
| 18 |
+
def prompt4conversation(prompt,context):
|
| 19 |
+
final_prompt = f""" GENERAL INFORMATION : ( today is {now.strftime("%d/%m/%Y %H:%M:%S")} , You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
|
| 20 |
+
ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE , WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
|
| 21 |
+
PREVIUS MESSAGE : ({context})
|
| 22 |
+
NOW THE USER ASK : {prompt} .
|
| 23 |
+
WRITE THE ANSWER :"""
|
| 24 |
+
return final_prompt
|
| 25 |
+
|
| 26 |
+
def prompt4conversationInternet(prompt,context, internet, resume):
|
| 27 |
+
final_prompt = f""" GENERAL INFORMATION : ( today is {now.strftime("%d/%m/%Y %H:%M:%S")} , You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
|
| 28 |
+
ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE , WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
|
| 29 |
+
PREVIUS MESSAGE : ({context})
|
| 30 |
+
NOW THE USER ASK : {prompt}.
|
| 31 |
+
INTERNET RESULT TO USE TO ANSWER : ({internet})
|
| 32 |
+
INTERNET RESUME : ({resume})
|
| 33 |
+
NOW THE USER ASK : {prompt}.
|
| 34 |
+
WRITE THE ANSWER BASED ON INTERNET INFORMATION :"""
|
| 35 |
+
return final_prompt
|
| 36 |
+
|
| 37 |
+
def prompt4Data(prompt, context, solution):
|
| 38 |
+
final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
|
| 39 |
+
ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE , YOU MUST MAKE THE CORRECT ANSWER MORE ARGUMENTED ! IF THE CORRECT ANSWER CONTAINS CODE YOU ARE OBLIGED TO INSERT IT IN YOUR NEW ANSWER!
|
| 40 |
+
PREVIUS MESSAGE : ({context})
|
| 41 |
+
NOW THE USER ASK : {prompt}
|
| 42 |
+
THIS IS THE CORRECT ANSWER : ({solution})
|
| 43 |
+
MAKE THE ANSWER MORE ARGUMENTED, WITHOUT CHANGING ANYTHING OF THE CORRECT ANSWER :"""
|
| 44 |
+
return final_prompt
|
| 45 |
+
|
| 46 |
+
def prompt4Code(prompt, context, solution):
|
| 47 |
+
final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
|
| 48 |
+
ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE , THE CORRECT ANSWER CONTAINS CODE YOU ARE OBLIGED TO INSERT IT IN YOUR NEW ANSWER!
|
| 49 |
+
PREVIUS MESSAGE : ({context})
|
| 50 |
+
NOW THE USER ASK : {prompt}
|
| 51 |
+
THIS IS THE CODE FOR THE ANSWER : ({solution})
|
| 52 |
+
WITHOUT CHANGING ANYTHING OF THE CODE of CORRECT ANSWER , MAKE THE ANSWER MORE DETALIED INCLUDING THE CORRECT CODE :"""
|
| 53 |
+
return final_prompt
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def prompt4Context(prompt, context, solution):
|
| 57 |
+
final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
|
| 58 |
+
ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE ,WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
|
| 59 |
+
PREVIUS MESSAGE : ({context})
|
| 60 |
+
NOW THE USER ASK : {prompt}
|
| 61 |
+
THIS IS THE CORRECT ANSWER : ({solution})
|
| 62 |
+
WITHOUT CHANGING ANYTHING OF CORRECT ANSWER , MAKE THE ANSWER MORE DETALIED:"""
|
| 63 |
+
return final_prompt
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def prompt4Audio(prompt, context, solution):
|
| 67 |
+
final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
|
| 68 |
+
ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE ,WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
|
| 69 |
+
PREVIUS MESSAGE : ({context})
|
| 70 |
+
NOW THE USER ASK : {prompt}
|
| 71 |
+
THIS IS THE CORRECT ANSWER based on Audio text gived in input : ({solution})
|
| 72 |
+
WITHOUT CHANGING ANYTHING OF CORRECT ANSWER , MAKE THE ANSWER MORE DETALIED:"""
|
| 73 |
+
return final_prompt
|
| 74 |
+
|
| 75 |
+
def prompt4YT(prompt, context, solution):
|
| 76 |
+
final_prompt = f"""GENERAL INFORMATION : You is built by Alessandro Ciciarelli the owener of intelligenzaartificialeitalia.net
|
| 77 |
+
ISTRUCTION : IN YOUR ANSWER NEVER INCLUDE THE USER QUESTION or MESSAGE ,WRITE ALWAYS ONLY YOUR ACCURATE ANSWER!
|
| 78 |
+
PREVIUS MESSAGE : ({context})
|
| 79 |
+
NOW THE USER ASK : {prompt}
|
| 80 |
+
THIS IS THE CORRECT ANSWER based on Youtube video gived in input : ({solution})
|
| 81 |
+
WITHOUT CHANGING ANYTHING OF CORRECT ANSWER , MAKE THE ANSWER MORE DETALIED:"""
|
| 82 |
+
return final_prompt
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
#HOW TO ADD YOUR OWN PROMPT :
|
| 86 |
+
# 1) ADD YOUR FUNCTION HERE, for example : def prompt4Me(prompt, context):
|
| 87 |
+
# 2) WRITE THE PROMPT TEMPLATE FOR YOUR FUNCTION, for example : template = f"YOU IS : {context} , NOW THE USER ASK : {prompt} . WRITE THE ANSWER :"
|
| 88 |
+
# 3) RETURN THE TEMPLATE, for example : return template
|
| 89 |
+
# 4) IMPORT YOUR FUNCTION IN THE MAIN FILE (streamlit_app.py) , for example : from promptTemplate import prompt4Me
|
| 90 |
+
# 5) FOLLOW OTHER SPTEP IN THE MAIN FILE (streamlit_app.py)
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
beautifulsoup4==4.12.2
|
| 2 |
+
docx2txt==0.8
|
| 3 |
+
duckduckgo_search==3.8.3
|
| 4 |
+
hugchat==0.0.8
|
| 5 |
+
langchain==0.0.219
|
| 6 |
+
pandas==2.0.1
|
| 7 |
+
pdfplumber==0.9.0
|
| 8 |
+
pydub==0.25.1
|
| 9 |
+
requests
|
| 10 |
+
sketch==0.4.2
|
| 11 |
+
SpeechRecognition==3.8.1
|
| 12 |
+
streamlit==1.24.0
|
| 13 |
+
streamlit_extras==0.2.7
|
| 14 |
+
youtube_search_python==1.6.6
|
| 15 |
+
youtube_transcript_api==0.6.1
|
| 16 |
+
chromadb==0.3.26
|
| 17 |
+
ffmpeg-python
|
| 18 |
+
ffprobe
|
| 19 |
+
huggingface_hub
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,1001 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import random
|
| 3 |
+
import shutil
|
| 4 |
+
import string
|
| 5 |
+
from zipfile import ZipFile
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from streamlit_extras.colored_header import colored_header
|
| 8 |
+
from streamlit_extras.add_vertical_space import add_vertical_space
|
| 9 |
+
from hugchat import hugchat
|
| 10 |
+
from hugchat.login import Login
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import asyncio
|
| 13 |
+
loop = asyncio.new_event_loop()
|
| 14 |
+
asyncio.set_event_loop(loop)
|
| 15 |
+
import sketch
|
| 16 |
+
from langchain.text_splitter import CharacterTextSplitter
|
| 17 |
+
from promptTemplate import prompt4conversation, prompt4Data, prompt4Code, prompt4Context, prompt4Audio, prompt4YT
|
| 18 |
+
from promptTemplate import prompt4conversationInternet
|
| 19 |
+
# FOR DEVELOPMENT NEW PLUGIN
|
| 20 |
+
# from promptTemplate import yourPLUGIN
|
| 21 |
+
from exportchat import export_chat
|
| 22 |
+
from langchain.vectorstores import Chroma
|
| 23 |
+
from langchain.chains import RetrievalQA
|
| 24 |
+
from HuggingChatAPI import HuggingChat
|
| 25 |
+
from langchain.embeddings import HuggingFaceHubEmbeddings
|
| 26 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 27 |
+
import requests
|
| 28 |
+
from bs4 import BeautifulSoup
|
| 29 |
+
import speech_recognition as sr
|
| 30 |
+
import pdfplumber
|
| 31 |
+
import docx2txt
|
| 32 |
+
from duckduckgo_search import DDGS
|
| 33 |
+
from itertools import islice
|
| 34 |
+
from os import path
|
| 35 |
+
from pydub import AudioSegment
|
| 36 |
+
import os
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
hf = None
|
| 40 |
+
repo_id = "sentence-transformers/all-mpnet-base-v2"
|
| 41 |
+
|
| 42 |
+
if 'hf_token' in st.session_state:
|
| 43 |
+
if 'hf' not in st.session_state:
|
| 44 |
+
hf = HuggingFaceHubEmbeddings(
|
| 45 |
+
repo_id=repo_id,
|
| 46 |
+
task="feature-extraction",
|
| 47 |
+
huggingfacehub_api_token=st.session_state['hf_token'],
|
| 48 |
+
) # type: ignore
|
| 49 |
+
st.session_state['hf'] = hf
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
st.set_page_config(
|
| 54 |
+
page_title="Talk with evrythings💬", page_icon="🤗", layout="wide", initial_sidebar_state="expanded"
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
st.markdown('<style>.css-w770g5{\
|
| 58 |
+
width: 100%;}\
|
| 59 |
+
.css-b3z5c9{ \
|
| 60 |
+
width: 100%;}\
|
| 61 |
+
.stButton>button{\
|
| 62 |
+
width: 100%;}\
|
| 63 |
+
.stDownloadButton>button{\
|
| 64 |
+
width: 100%;}\
|
| 65 |
+
</style>', unsafe_allow_html=True)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# Sidebar contents for logIN, choose plugin, and export chat
|
| 73 |
+
with st.sidebar:
|
| 74 |
+
st.title('🤗💬 PersonalChat App')
|
| 75 |
+
|
| 76 |
+
if 'hf_email' not in st.session_state or 'hf_pass' not in st.session_state:
|
| 77 |
+
with st.expander("ℹ️ Login in Hugging Face", expanded=True):
|
| 78 |
+
st.write("⚠️ You need to login in Hugging Face to use this app. You can register [here](https://huggingface.co/join).")
|
| 79 |
+
st.header('Hugging Face Login')
|
| 80 |
+
hf_email = st.text_input('Enter E-mail:')
|
| 81 |
+
hf_pass = st.text_input('Enter password:', type='password')
|
| 82 |
+
hf_token = st.text_input('Enter API Token:', type='password')
|
| 83 |
+
if st.button('Login 🚀') and hf_email and hf_pass and hf_token:
|
| 84 |
+
with st.spinner('🚀 Logging in...'):
|
| 85 |
+
st.session_state['hf_email'] = hf_email
|
| 86 |
+
st.session_state['hf_pass'] = hf_pass
|
| 87 |
+
st.session_state['hf_token'] = hf_token
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
|
| 91 |
+
sign = Login(st.session_state['hf_email'], st.session_state['hf_pass'])
|
| 92 |
+
cookies = sign.login()
|
| 93 |
+
chatbot = hugchat.ChatBot(cookies=cookies.get_dict())
|
| 94 |
+
except Exception as e:
|
| 95 |
+
st.error(e)
|
| 96 |
+
st.info("⚠️ Please check your credentials and try again.")
|
| 97 |
+
st.error("⚠️ dont abuse the API")
|
| 98 |
+
st.warning("⚠️ If you don't have an account, you can register [here](https://huggingface.co/join).")
|
| 99 |
+
from time import sleep
|
| 100 |
+
sleep(3)
|
| 101 |
+
del st.session_state['hf_email']
|
| 102 |
+
del st.session_state['hf_pass']
|
| 103 |
+
del st.session_state['hf_token']
|
| 104 |
+
st.experimental_rerun()
|
| 105 |
+
|
| 106 |
+
st.session_state['chatbot'] = chatbot
|
| 107 |
+
|
| 108 |
+
id = st.session_state['chatbot'].new_conversation()
|
| 109 |
+
st.session_state['chatbot'].change_conversation(id)
|
| 110 |
+
|
| 111 |
+
st.session_state['conversation'] = id
|
| 112 |
+
# Generate empty lists for generated and past.
|
| 113 |
+
## generated stores AI generated responses
|
| 114 |
+
if 'generated' not in st.session_state:
|
| 115 |
+
st.session_state['generated'] = ["I'm **IA ITALIA chat**, How may I help you ? "]
|
| 116 |
+
## past stores User's questions
|
| 117 |
+
if 'past' not in st.session_state:
|
| 118 |
+
st.session_state['past'] = ['Hi!']
|
| 119 |
+
|
| 120 |
+
st.session_state['LLM'] = HuggingChat(email=st.session_state['hf_email'], psw=st.session_state['hf_pass'])
|
| 121 |
+
|
| 122 |
+
st.experimental_rerun()
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
else:
|
| 126 |
+
with st.expander("ℹ️ Advanced Settings"):
|
| 127 |
+
#temperature: Optional[float]. Default is 0.5
|
| 128 |
+
#top_p: Optional[float]. Default is 0.95
|
| 129 |
+
#repetition_penalty: Optional[float]. Default is 1.2
|
| 130 |
+
#top_k: Optional[int]. Default is 50
|
| 131 |
+
#max_new_tokens: Optional[int]. Default is 1024
|
| 132 |
+
|
| 133 |
+
temperature = st.slider('🌡 Temperature', min_value=0.1, max_value=1.0, value=0.5, step=0.01)
|
| 134 |
+
top_p = st.slider('💡 Top P', min_value=0.1, max_value=1.0, value=0.95, step=0.01)
|
| 135 |
+
repetition_penalty = st.slider('🖌 Repetition Penalty', min_value=1.0, max_value=2.0, value=1.2, step=0.01)
|
| 136 |
+
top_k = st.slider('❄️ Top K', min_value=1, max_value=100, value=50, step=1)
|
| 137 |
+
max_new_tokens = st.slider('📝 Max New Tokens', min_value=1, max_value=1024, value=1024, step=1)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
# FOR DEVELOPMENT NEW PLUGIN YOU MUST ADD IT HERE INTO THE LIST
|
| 141 |
+
# YOU NEED ADD THE NAME AT 144 LINE
|
| 142 |
+
|
| 143 |
+
#plugins for conversation
|
| 144 |
+
plugins = ["🛑 No PLUGIN","🌐 Web Search", "🔗 Talk with Website" , "📋 Talk with your DATA", "📝 Talk with your DOCUMENTS", "🎧 Talk with your AUDIO", "🎥 Talk with YT video", "🧠 GOD MODE" ,"💾 Upload saved VectorStore"]
|
| 145 |
+
if 'plugin' not in st.session_state:
|
| 146 |
+
st.session_state['plugin'] = st.selectbox('🔌 Plugins', plugins, index=0)
|
| 147 |
+
else:
|
| 148 |
+
if st.session_state['plugin'] == "🛑 No PLUGIN":
|
| 149 |
+
st.session_state['plugin'] = st.selectbox('🔌 Plugins', plugins, index=plugins.index(st.session_state['plugin']))
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
# FOR DEVELOPMENT NEW PLUGIN FOLLOW THIS TEMPLATE
|
| 153 |
+
# PLUGIN TEMPLATE
|
| 154 |
+
# if st.session_state['plugin'] == "🔌 PLUGIN NAME" and 'PLUGIN NAME' not in st.session_state:
|
| 155 |
+
# # PLUGIN SETTINGS
|
| 156 |
+
# with st.expander("🔌 PLUGIN NAME Settings", expanded=True):
|
| 157 |
+
# if 'PLUGIN NAME' not in st.session_state or st.session_state['PLUGIN NAME'] == False:
|
| 158 |
+
# # PLUGIN CODE
|
| 159 |
+
# st.session_state['PLUGIN NAME'] = True
|
| 160 |
+
# elif st.session_state['PLUGIN NAME'] == True:
|
| 161 |
+
# # PLUGIN CODE
|
| 162 |
+
# if st.button('🔌 Disable PLUGIN NAME'):
|
| 163 |
+
# st.session_state['plugin'] = "🛑 No PLUGIN"
|
| 164 |
+
# st.session_state['PLUGIN NAME'] = False
|
| 165 |
+
# del ALL SESSION STATE VARIABLES RELATED TO PLUGIN
|
| 166 |
+
# st.experimental_rerun()
|
| 167 |
+
# # PLUGIN UPLOADER
|
| 168 |
+
# if st.session_state['PLUGIN NAME'] == True:
|
| 169 |
+
# with st.expander("🔌 PLUGIN NAME Uploader", expanded=True):
|
| 170 |
+
# # PLUGIN UPLOADER CODE
|
| 171 |
+
# load file
|
| 172 |
+
# if load file and st.button('🔌 Upload PLUGIN NAME'):
|
| 173 |
+
# qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
|
| 174 |
+
# st.session_state['PLUGIN DB'] = qa
|
| 175 |
+
# st.experimental_rerun()
|
| 176 |
+
#
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
# WEB SEARCH PLUGIN
|
| 181 |
+
if st.session_state['plugin'] == "🌐 Web Search" and 'web_search' not in st.session_state:
|
| 182 |
+
# web search settings
|
| 183 |
+
with st.expander("🌐 Web Search Settings", expanded=True):
|
| 184 |
+
if 'web_search' not in st.session_state or st.session_state['web_search'] == False:
|
| 185 |
+
reg = ['us-en', 'uk-en', 'it-it']
|
| 186 |
+
sf = ['on', 'moderate', 'off']
|
| 187 |
+
tl = ['d', 'w', 'm', 'y']
|
| 188 |
+
if 'region' not in st.session_state:
|
| 189 |
+
st.session_state['region'] = st.selectbox('🗺 Region', reg, index=1)
|
| 190 |
+
else:
|
| 191 |
+
st.session_state['region'] = st.selectbox('🗺 Region', reg, index=reg.index(st.session_state['region']))
|
| 192 |
+
if 'safesearch' not in st.session_state:
|
| 193 |
+
st.session_state['safesearch'] = st.selectbox('🚨 Safe Search', sf, index=1)
|
| 194 |
+
else:
|
| 195 |
+
st.session_state['safesearch'] = st.selectbox('🚨 Safe Search', sf, index=sf.index(st.session_state['safesearch']))
|
| 196 |
+
if 'timelimit' not in st.session_state:
|
| 197 |
+
st.session_state['timelimit'] = st.selectbox('📅 Time Limit', tl, index=1)
|
| 198 |
+
else:
|
| 199 |
+
st.session_state['timelimit'] = st.selectbox('📅 Time Limit', tl, index=tl.index(st.session_state['timelimit']))
|
| 200 |
+
if 'max_results' not in st.session_state:
|
| 201 |
+
st.session_state['max_results'] = st.slider('📊 Max Results', min_value=1, max_value=5, value=2, step=1)
|
| 202 |
+
else:
|
| 203 |
+
st.session_state['max_results'] = st.slider('📊 Max Results', min_value=1, max_value=5, value=st.session_state['max_results'], step=1)
|
| 204 |
+
if st.button('🌐 Save change'):
|
| 205 |
+
st.session_state['web_search'] = "True"
|
| 206 |
+
st.experimental_rerun()
|
| 207 |
+
|
| 208 |
+
elif st.session_state['plugin'] == "🌐 Web Search" and st.session_state['web_search'] == 'True':
|
| 209 |
+
with st.expander("🌐 Web Search Settings", expanded=True):
|
| 210 |
+
st.write('🚀 Web Search is enabled')
|
| 211 |
+
st.write('🗺 Region: ', st.session_state['region'])
|
| 212 |
+
st.write('🚨 Safe Search: ', st.session_state['safesearch'])
|
| 213 |
+
st.write('📅 Time Limit: ', st.session_state['timelimit'])
|
| 214 |
+
if st.button('🌐🛑 Disable Web Search'):
|
| 215 |
+
del st.session_state['web_search']
|
| 216 |
+
del st.session_state['region']
|
| 217 |
+
del st.session_state['safesearch']
|
| 218 |
+
del st.session_state['timelimit']
|
| 219 |
+
del st.session_state['max_results']
|
| 220 |
+
del st.session_state['plugin']
|
| 221 |
+
st.experimental_rerun()
|
| 222 |
+
|
| 223 |
+
# GOD MODE PLUGIN
|
| 224 |
+
if st.session_state['plugin'] == "🧠 GOD MODE" and 'god_mode' not in st.session_state:
|
| 225 |
+
with st.expander("🧠 GOD MODE Settings", expanded=True):
|
| 226 |
+
if 'god_mode' not in st.session_state or st.session_state['god_mode'] == False:
|
| 227 |
+
topic = st.text_input('🔎 Topic', "Artificial Intelligence in Finance")
|
| 228 |
+
web_result = st.checkbox('🌐 Web Search', value=True, disabled=True)
|
| 229 |
+
yt_result = st.checkbox('🎥 YT Search', value=True, disabled=True)
|
| 230 |
+
website_result = st.checkbox('🔗 Website Search', value=True, disabled=True)
|
| 231 |
+
deep_of_search = st.slider('📊 Deep of Search', min_value=1, max_value=5, value=2, step=1)
|
| 232 |
+
if st.button('🧠✅ Give knowledge to the model'):
|
| 233 |
+
full_text = []
|
| 234 |
+
links = []
|
| 235 |
+
news = []
|
| 236 |
+
yt_ids = []
|
| 237 |
+
source = []
|
| 238 |
+
if web_result == True:
|
| 239 |
+
internet_result = ""
|
| 240 |
+
internet_answer = ""
|
| 241 |
+
with DDGS() as ddgs:
|
| 242 |
+
with st.spinner('🌐 Searching on the web...'):
|
| 243 |
+
ddgs_gen = ddgs.text(topic, region="us-en")
|
| 244 |
+
for r in islice(ddgs_gen, deep_of_search):
|
| 245 |
+
l = r['href']
|
| 246 |
+
source.append(l)
|
| 247 |
+
links.append(l)
|
| 248 |
+
internet_result += str(r) + "\n\n"
|
| 249 |
+
|
| 250 |
+
fast_answer = ddgs.news(topic)
|
| 251 |
+
for r in islice(fast_answer, deep_of_search):
|
| 252 |
+
internet_answer += str(r) + "\n\n"
|
| 253 |
+
l = r['url']
|
| 254 |
+
source.append(l)
|
| 255 |
+
news.append(r)
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
full_text.append(internet_result)
|
| 259 |
+
full_text.append(internet_answer)
|
| 260 |
+
|
| 261 |
+
if yt_result == True:
|
| 262 |
+
with st.spinner('🎥 Searching on YT...'):
|
| 263 |
+
from youtubesearchpython import VideosSearch
|
| 264 |
+
videosSearch = VideosSearch(topic, limit = deep_of_search)
|
| 265 |
+
yt_result = videosSearch.result()
|
| 266 |
+
for i in yt_result['result']: # type: ignore
|
| 267 |
+
duration = i['duration'] # type: ignore
|
| 268 |
+
duration = duration.split(':')
|
| 269 |
+
if len(duration) == 3:
|
| 270 |
+
#skip videos longer than 1 hour
|
| 271 |
+
if int(duration[0]) > 1:
|
| 272 |
+
continue
|
| 273 |
+
if len(duration) == 2:
|
| 274 |
+
#skip videos longer than 30 minutes
|
| 275 |
+
if int(duration[0]) > 30:
|
| 276 |
+
continue
|
| 277 |
+
yt_ids.append(i['id']) # type: ignore
|
| 278 |
+
source.append("https://www.youtube.com/watch?v="+i['id']) # type: ignore
|
| 279 |
+
full_text.append(i['title']) # type: ignore
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
if website_result == True:
|
| 283 |
+
for l in links:
|
| 284 |
+
try:
|
| 285 |
+
with st.spinner(f'👨💻 Scraping website : {l}'):
|
| 286 |
+
r = requests.get(l)
|
| 287 |
+
soup = BeautifulSoup(r.content, 'html.parser')
|
| 288 |
+
full_text.append(soup.get_text()+"\n\n")
|
| 289 |
+
except:
|
| 290 |
+
pass
|
| 291 |
+
|
| 292 |
+
for id in yt_ids:
|
| 293 |
+
try:
|
| 294 |
+
yt_video_txt= []
|
| 295 |
+
with st.spinner(f'👨💻 Scraping YT video : {id}'):
|
| 296 |
+
transcript_list = YouTubeTranscriptApi.list_transcripts(id)
|
| 297 |
+
transcript_en = None
|
| 298 |
+
last_language = ""
|
| 299 |
+
for transcript in transcript_list:
|
| 300 |
+
if transcript.language_code == 'en':
|
| 301 |
+
transcript_en = transcript
|
| 302 |
+
break
|
| 303 |
+
else:
|
| 304 |
+
last_language = transcript.language_code
|
| 305 |
+
if transcript_en is None:
|
| 306 |
+
transcript_en = transcript_list.find_transcript([last_language])
|
| 307 |
+
transcript_en = transcript_en.translate('en')
|
| 308 |
+
|
| 309 |
+
text = transcript_en.fetch()
|
| 310 |
+
yt_video_txt.append(text)
|
| 311 |
+
|
| 312 |
+
for i in range(len(yt_video_txt)):
|
| 313 |
+
for j in range(len(yt_video_txt[i])):
|
| 314 |
+
full_text.append(yt_video_txt[i][j]['text'])
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
except:
|
| 318 |
+
pass
|
| 319 |
+
|
| 320 |
+
with st.spinner('🧠 Building vectorstore with knowledge...'):
|
| 321 |
+
full_text = "\n".join(full_text)
|
| 322 |
+
st.session_state['god_text'] = [full_text]
|
| 323 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 324 |
+
texts = text_splitter.create_documents([full_text])
|
| 325 |
+
# Select embeddings
|
| 326 |
+
embeddings = st.session_state['hf']
|
| 327 |
+
# Create a vectorstore from documents
|
| 328 |
+
random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
|
| 329 |
+
db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
|
| 330 |
+
|
| 331 |
+
with st.spinner('🔨 Saving vectorstore...'):
|
| 332 |
+
# save vectorstore
|
| 333 |
+
db.persist()
|
| 334 |
+
#create .zip file of directory to download
|
| 335 |
+
shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
|
| 336 |
+
# save in session state and download
|
| 337 |
+
st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
|
| 338 |
+
|
| 339 |
+
with st.spinner('🔨 Creating QA chain...'):
|
| 340 |
+
# Create retriever interface
|
| 341 |
+
retriever = db.as_retriever()
|
| 342 |
+
# Create QA chain
|
| 343 |
+
qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
|
| 344 |
+
st.session_state['god_mode'] = qa
|
| 345 |
+
st.session_state['god_mode_source'] = source
|
| 346 |
+
st.session_state['god_mode_info'] = "🧠 GOD MODE have builded a vectorstore about **" + topic + f"**. The knowledge is based on\n- {len(news)} news🗞\n- {len(yt_ids)} YT videos📺\n- {len(links)} websites🌐 \n"
|
| 347 |
+
|
| 348 |
+
st.experimental_rerun()
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
if st.session_state['plugin'] == "🧠 GOD MODE" and 'god_mode' in st.session_state:
|
| 352 |
+
with st.expander("**✅ GOD MODE is enabled 🚀**", expanded=True):
|
| 353 |
+
st.markdown(st.session_state['god_mode_info'])
|
| 354 |
+
if 'db' in st.session_state:
|
| 355 |
+
# leave ./ from name for download
|
| 356 |
+
file_name = st.session_state['db'][2:]
|
| 357 |
+
st.download_button(
|
| 358 |
+
label="📩 Download vectorstore",
|
| 359 |
+
data=open(file_name, 'rb').read(),
|
| 360 |
+
file_name=file_name,
|
| 361 |
+
mime='application/zip'
|
| 362 |
+
)
|
| 363 |
+
if st.button('🧠🛑 Disable GOD MODE'):
|
| 364 |
+
del st.session_state['god_mode']
|
| 365 |
+
del st.session_state['db']
|
| 366 |
+
del st.session_state['god_text']
|
| 367 |
+
del st.session_state['god_mode_info']
|
| 368 |
+
del st.session_state['god_mode_source']
|
| 369 |
+
del st.session_state['plugin']
|
| 370 |
+
st.experimental_rerun()
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
# DATA PLUGIN
|
| 374 |
+
if st.session_state['plugin'] == "📋 Talk with your DATA" and 'df' not in st.session_state:
|
| 375 |
+
with st.expander("📋 Talk with your DATA", expanded= True):
|
| 376 |
+
upload_csv = st.file_uploader("Upload your CSV", type=['csv'])
|
| 377 |
+
if upload_csv is not None:
|
| 378 |
+
df = pd.read_csv(upload_csv)
|
| 379 |
+
st.session_state['df'] = df
|
| 380 |
+
st.experimental_rerun()
|
| 381 |
+
if st.session_state['plugin'] == "📋 Talk with your DATA":
|
| 382 |
+
if st.button('🛑📋 Remove DATA from context'):
|
| 383 |
+
if 'df' in st.session_state:
|
| 384 |
+
del st.session_state['df']
|
| 385 |
+
del st.session_state['plugin']
|
| 386 |
+
st.experimental_rerun()
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
# DOCUMENTS PLUGIN
|
| 391 |
+
if st.session_state['plugin'] == "📝 Talk with your DOCUMENTS" and 'documents' not in st.session_state:
|
| 392 |
+
with st.expander("📝 Talk with your DOCUMENT", expanded=True):
|
| 393 |
+
upload_pdf = st.file_uploader("Upload your DOCUMENT", type=['txt', 'pdf', 'docx'], accept_multiple_files=True)
|
| 394 |
+
if upload_pdf is not None and st.button('📝✅ Load Documents'):
|
| 395 |
+
documents = []
|
| 396 |
+
with st.spinner('🔨 Reading documents...'):
|
| 397 |
+
for upload_pdf in upload_pdf:
|
| 398 |
+
print(upload_pdf.type)
|
| 399 |
+
if upload_pdf.type == 'text/plain':
|
| 400 |
+
documents += [upload_pdf.read().decode()]
|
| 401 |
+
elif upload_pdf.type == 'application/pdf':
|
| 402 |
+
with pdfplumber.open(upload_pdf) as pdf:
|
| 403 |
+
documents += [page.extract_text() for page in pdf.pages]
|
| 404 |
+
elif upload_pdf.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
| 405 |
+
text = docx2txt.process(upload_pdf)
|
| 406 |
+
documents += [text]
|
| 407 |
+
st.session_state['documents'] = documents
|
| 408 |
+
# Split documents into chunks
|
| 409 |
+
with st.spinner('🔨 Creating vectorstore...'):
|
| 410 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 411 |
+
texts = text_splitter.create_documents(documents)
|
| 412 |
+
# Select embeddings
|
| 413 |
+
embeddings = st.session_state['hf']
|
| 414 |
+
# Create a vectorstore from documents
|
| 415 |
+
random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
|
| 416 |
+
db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
|
| 417 |
+
|
| 418 |
+
with st.spinner('🔨 Saving vectorstore...'):
|
| 419 |
+
# save vectorstore
|
| 420 |
+
db.persist()
|
| 421 |
+
#create .zip file of directory to download
|
| 422 |
+
shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
|
| 423 |
+
# save in session state and download
|
| 424 |
+
st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
|
| 425 |
+
|
| 426 |
+
with st.spinner('🔨 Creating QA chain...'):
|
| 427 |
+
# Create retriever interface
|
| 428 |
+
retriever = db.as_retriever()
|
| 429 |
+
# Create QA chain
|
| 430 |
+
qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
|
| 431 |
+
st.session_state['pdf'] = qa
|
| 432 |
+
|
| 433 |
+
st.experimental_rerun()
|
| 434 |
+
|
| 435 |
+
if st.session_state['plugin'] == "📝 Talk with your DOCUMENTS":
|
| 436 |
+
if 'db' in st.session_state:
|
| 437 |
+
# leave ./ from name for download
|
| 438 |
+
file_name = st.session_state['db'][2:]
|
| 439 |
+
st.download_button(
|
| 440 |
+
label="📩 Download vectorstore",
|
| 441 |
+
data=open(file_name, 'rb').read(),
|
| 442 |
+
file_name=file_name,
|
| 443 |
+
mime='application/zip'
|
| 444 |
+
)
|
| 445 |
+
if st.button('🛑📝 Remove PDF from context'):
|
| 446 |
+
if 'pdf' in st.session_state:
|
| 447 |
+
del st.session_state['db']
|
| 448 |
+
del st.session_state['pdf']
|
| 449 |
+
del st.session_state['documents']
|
| 450 |
+
del st.session_state['plugin']
|
| 451 |
+
|
| 452 |
+
st.experimental_rerun()
|
| 453 |
+
|
| 454 |
+
# AUDIO PLUGIN
|
| 455 |
+
if st.session_state['plugin'] == "🎧 Talk with your AUDIO" and 'audio' not in st.session_state:
|
| 456 |
+
with st.expander("🎙 Talk with your AUDIO", expanded=True):
|
| 457 |
+
f = st.file_uploader("Upload your AUDIO", type=['wav', 'mp3'])
|
| 458 |
+
if f is not None:
|
| 459 |
+
if f.type == 'audio/mpeg':
|
| 460 |
+
#convert mp3 to wav
|
| 461 |
+
with st.spinner('🔨 Converting mp3 to wav...'):
|
| 462 |
+
#save mp3
|
| 463 |
+
with open('audio.mp3', 'wb') as out:
|
| 464 |
+
out.write(f.read())
|
| 465 |
+
#convert to wav
|
| 466 |
+
sound = AudioSegment.from_mp3("audio.mp3")
|
| 467 |
+
sound.export("audio.wav", format="wav")
|
| 468 |
+
file_name = 'audio.wav'
|
| 469 |
+
else:
|
| 470 |
+
with open(f.name, 'wb') as out:
|
| 471 |
+
out.write(f.read())
|
| 472 |
+
|
| 473 |
+
bytes_data = f.read()
|
| 474 |
+
file_name = f.name
|
| 475 |
+
|
| 476 |
+
r = sr.Recognizer()
|
| 477 |
+
#Given audio file must be a filename string or a file-like object
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
with st.spinner('🔨 Reading audio...'):
|
| 481 |
+
with sr.AudioFile(file_name) as source:
|
| 482 |
+
# listen for the data (load audio to memory)
|
| 483 |
+
audio_data = r.record(source)
|
| 484 |
+
# recognize (convert from speech to text)
|
| 485 |
+
text = r.recognize_google(audio_data)
|
| 486 |
+
data = [text]
|
| 487 |
+
# data = query(bytes_data)
|
| 488 |
+
with st.spinner('🎙 Creating Vectorstore...'):
|
| 489 |
+
|
| 490 |
+
#split text into chunks
|
| 491 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 492 |
+
texts = text_splitter.create_documents(text)
|
| 493 |
+
|
| 494 |
+
embeddings = st.session_state['hf']
|
| 495 |
+
# Create a vectorstore from documents
|
| 496 |
+
random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
|
| 497 |
+
db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
|
| 498 |
+
# save vectorstore
|
| 499 |
+
|
| 500 |
+
with st.spinner('🎙 Saving Vectorstore...'):
|
| 501 |
+
db.persist()
|
| 502 |
+
#create .zip file of directory to download
|
| 503 |
+
shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
|
| 504 |
+
# save in session state and download
|
| 505 |
+
st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
|
| 506 |
+
|
| 507 |
+
with st.spinner('🎙 Creating QA chain...'):
|
| 508 |
+
# Create retriever interface
|
| 509 |
+
retriever = db.as_retriever()
|
| 510 |
+
# Create QA chain
|
| 511 |
+
qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
|
| 512 |
+
st.session_state['audio'] = qa
|
| 513 |
+
st.session_state['audio_text'] = text
|
| 514 |
+
st.experimental_rerun()
|
| 515 |
+
|
| 516 |
+
if st.session_state['plugin'] == "🎧 Talk with your AUDIO":
|
| 517 |
+
if 'db' in st.session_state:
|
| 518 |
+
# leave ./ from name for download
|
| 519 |
+
file_name = st.session_state['db'][2:]
|
| 520 |
+
st.download_button(
|
| 521 |
+
label="📩 Download vectorstore",
|
| 522 |
+
data=open(file_name, 'rb').read(),
|
| 523 |
+
file_name=file_name,
|
| 524 |
+
mime='application/zip'
|
| 525 |
+
)
|
| 526 |
+
if st.button('🛑🎙 Remove AUDIO from context'):
|
| 527 |
+
if 'audio' in st.session_state:
|
| 528 |
+
del st.session_state['db']
|
| 529 |
+
del st.session_state['audio']
|
| 530 |
+
del st.session_state['audio_text']
|
| 531 |
+
del st.session_state['plugin']
|
| 532 |
+
st.experimental_rerun()
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
# YT PLUGIN
|
| 536 |
+
if st.session_state['plugin'] == "🎥 Talk with YT video" and 'yt' not in st.session_state:
|
| 537 |
+
with st.expander("🎥 Talk with YT video", expanded=True):
|
| 538 |
+
yt_url = st.text_input("1.📺 Enter a YouTube URL")
|
| 539 |
+
yt_url2 = st.text_input("2.📺 Enter a YouTube URL")
|
| 540 |
+
yt_url3 = st.text_input("3.📺 Enter a YouTube URL")
|
| 541 |
+
if yt_url is not None and st.button('🎥✅ Add YouTube video to context'):
|
| 542 |
+
if yt_url != "":
|
| 543 |
+
video = 1
|
| 544 |
+
yt_url = yt_url.split("=")[1]
|
| 545 |
+
if yt_url2 != "":
|
| 546 |
+
yt_url2 = yt_url2.split("=")[1]
|
| 547 |
+
video = 2
|
| 548 |
+
if yt_url3 != "":
|
| 549 |
+
yt_url3 = yt_url3.split("=")[1]
|
| 550 |
+
video = 3
|
| 551 |
+
|
| 552 |
+
text_yt = []
|
| 553 |
+
text_list = []
|
| 554 |
+
for i in range(video):
|
| 555 |
+
with st.spinner(f'🎥 Extracting TEXT from YouTube video {str(i)} ...'):
|
| 556 |
+
#get en subtitles
|
| 557 |
+
transcript_list = YouTubeTranscriptApi.list_transcripts(yt_url)
|
| 558 |
+
transcript_en = None
|
| 559 |
+
last_language = ""
|
| 560 |
+
for transcript in transcript_list:
|
| 561 |
+
if transcript.language_code == 'en':
|
| 562 |
+
transcript_en = transcript
|
| 563 |
+
break
|
| 564 |
+
else:
|
| 565 |
+
last_language = transcript.language_code
|
| 566 |
+
if transcript_en is None:
|
| 567 |
+
transcript_en = transcript_list.find_transcript([last_language])
|
| 568 |
+
transcript_en = transcript_en.translate('en')
|
| 569 |
+
|
| 570 |
+
text = transcript_en.fetch()
|
| 571 |
+
text_yt.append(text)
|
| 572 |
+
|
| 573 |
+
for i in range(len(text_yt)):
|
| 574 |
+
for j in range(len(text_yt[i])):
|
| 575 |
+
text_list.append(text_yt[i][j]['text'])
|
| 576 |
+
|
| 577 |
+
# creating a vectorstore
|
| 578 |
+
|
| 579 |
+
with st.spinner('🎥 Creating Vectorstore...'):
|
| 580 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 581 |
+
texts = text_splitter.create_documents(text_list)
|
| 582 |
+
# Select embeddings
|
| 583 |
+
embeddings = st.session_state['hf']
|
| 584 |
+
# Create a vectorstore from documents
|
| 585 |
+
random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
|
| 586 |
+
db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
|
| 587 |
+
|
| 588 |
+
with st.spinner('🎥 Saving Vectorstore...'):
|
| 589 |
+
# save vectorstore
|
| 590 |
+
db.persist()
|
| 591 |
+
#create .zip file of directory to download
|
| 592 |
+
shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
|
| 593 |
+
# save in session state and download
|
| 594 |
+
st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
|
| 595 |
+
|
| 596 |
+
with st.spinner('🎥 Creating QA chain...'):
|
| 597 |
+
# Create retriever interface
|
| 598 |
+
retriever = db.as_retriever()
|
| 599 |
+
# Create QA chain
|
| 600 |
+
qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
|
| 601 |
+
st.session_state['yt'] = qa
|
| 602 |
+
st.session_state['yt_text'] = text_list
|
| 603 |
+
st.experimental_rerun()
|
| 604 |
+
|
| 605 |
+
if st.session_state['plugin'] == "🎥 Talk with YT video":
|
| 606 |
+
if 'db' in st.session_state:
|
| 607 |
+
# leave ./ from name for download
|
| 608 |
+
file_name = st.session_state['db'][2:]
|
| 609 |
+
st.download_button(
|
| 610 |
+
label="📩 Download vectorstore",
|
| 611 |
+
data=open(file_name, 'rb').read(),
|
| 612 |
+
file_name=file_name,
|
| 613 |
+
mime='application/zip'
|
| 614 |
+
)
|
| 615 |
+
|
| 616 |
+
if st.button('🛑🎥 Remove YT video from context'):
|
| 617 |
+
if 'yt' in st.session_state:
|
| 618 |
+
del st.session_state['db']
|
| 619 |
+
del st.session_state['yt']
|
| 620 |
+
del st.session_state['yt_text']
|
| 621 |
+
del st.session_state['plugin']
|
| 622 |
+
st.experimental_rerun()
|
| 623 |
+
|
| 624 |
+
# WEBSITE PLUGIN
|
| 625 |
+
if st.session_state['plugin'] == "🔗 Talk with Website" and 'web_sites' not in st.session_state:
|
| 626 |
+
with st.expander("🔗 Talk with Website", expanded=True):
|
| 627 |
+
web_url = st.text_area("🔗 Enter a website URLs , one for each line")
|
| 628 |
+
if web_url is not None and st.button('🔗✅ Add website to context'):
|
| 629 |
+
if web_url != "":
|
| 630 |
+
text = []
|
| 631 |
+
#max 10 websites
|
| 632 |
+
with st.spinner('🔗 Extracting TEXT from Websites ...'):
|
| 633 |
+
for url in web_url.split("\n")[:10]:
|
| 634 |
+
page = requests.get(url)
|
| 635 |
+
soup = BeautifulSoup(page.content, 'html.parser')
|
| 636 |
+
text.append(soup.get_text())
|
| 637 |
+
# creating a vectorstore
|
| 638 |
+
|
| 639 |
+
with st.spinner('🔗 Creating Vectorstore...'):
|
| 640 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 641 |
+
texts = text_splitter.create_documents(text)
|
| 642 |
+
# Select embeddings
|
| 643 |
+
embeddings = st.session_state['hf']
|
| 644 |
+
# Create a vectorstore from documents
|
| 645 |
+
random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
|
| 646 |
+
db = Chroma.from_documents(texts, embeddings, persist_directory="./chroma_db_" + random_str)
|
| 647 |
+
|
| 648 |
+
with st.spinner('🔗 Saving Vectorstore...'):
|
| 649 |
+
# save vectorstore
|
| 650 |
+
db.persist()
|
| 651 |
+
#create .zip file of directory to download
|
| 652 |
+
shutil.make_archive("./chroma_db_" + random_str, 'zip', "./chroma_db_" + random_str)
|
| 653 |
+
# save in session state and download
|
| 654 |
+
st.session_state['db'] = "./chroma_db_" + random_str + ".zip"
|
| 655 |
+
|
| 656 |
+
with st.spinner('🔗 Creating QA chain...'):
|
| 657 |
+
# Create retriever interface
|
| 658 |
+
retriever = db.as_retriever()
|
| 659 |
+
# Create QA chain
|
| 660 |
+
qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
|
| 661 |
+
st.session_state['web_sites'] = qa
|
| 662 |
+
st.session_state['web_text'] = text
|
| 663 |
+
st.experimental_rerun()
|
| 664 |
+
|
| 665 |
+
if st.session_state['plugin'] == "🔗 Talk with Website":
|
| 666 |
+
if 'db' in st.session_state:
|
| 667 |
+
# leave ./ from name for download
|
| 668 |
+
file_name = st.session_state['db'][2:]
|
| 669 |
+
st.download_button(
|
| 670 |
+
label="📩 Download vectorstore",
|
| 671 |
+
data=open(file_name, 'rb').read(),
|
| 672 |
+
file_name=file_name,
|
| 673 |
+
mime='application/zip'
|
| 674 |
+
)
|
| 675 |
+
|
| 676 |
+
if st.button('🛑🔗 Remove Website from context'):
|
| 677 |
+
if 'web_sites' in st.session_state:
|
| 678 |
+
del st.session_state['db']
|
| 679 |
+
del st.session_state['web_sites']
|
| 680 |
+
del st.session_state['web_text']
|
| 681 |
+
del st.session_state['plugin']
|
| 682 |
+
st.experimental_rerun()
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
# UPLOAD PREVIUS VECTORSTORE
|
| 686 |
+
if st.session_state['plugin'] == "💾 Upload saved VectorStore" and 'old_db' not in st.session_state:
|
| 687 |
+
with st.expander("💾 Upload saved VectorStore", expanded=True):
|
| 688 |
+
db_file = st.file_uploader("Upload a saved VectorStore", type=["zip"])
|
| 689 |
+
if db_file is not None and st.button('✅💾 Add saved VectorStore to context'):
|
| 690 |
+
if db_file != "":
|
| 691 |
+
with st.spinner('💾 Extracting VectorStore...'):
|
| 692 |
+
# unzip file in a new directory
|
| 693 |
+
with ZipFile(db_file, 'r') as zipObj:
|
| 694 |
+
# Extract all the contents of zip file in different directory
|
| 695 |
+
random_str = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
|
| 696 |
+
zipObj.extractall("chroma_db_" + random_str)
|
| 697 |
+
# save in session state the path of the directory
|
| 698 |
+
st.session_state['old_db'] = "chroma_db_" + random_str
|
| 699 |
+
hf = st.session_state['hf']
|
| 700 |
+
# Create retriever interface
|
| 701 |
+
db = Chroma("chroma_db_" + random_str, embedding_function=hf)
|
| 702 |
+
|
| 703 |
+
with st.spinner('💾 Creating QA chain...'):
|
| 704 |
+
retriever = db.as_retriever()
|
| 705 |
+
# Create QA chain
|
| 706 |
+
qa = RetrievalQA.from_chain_type(llm=st.session_state['LLM'], chain_type='stuff', retriever=retriever, return_source_documents=True)
|
| 707 |
+
st.session_state['old_db'] = qa
|
| 708 |
+
st.experimental_rerun()
|
| 709 |
+
|
| 710 |
+
if st.session_state['plugin'] == "💾 Upload saved VectorStore":
|
| 711 |
+
if st.button('🛑💾 Remove VectorStore from context'):
|
| 712 |
+
if 'old_db' in st.session_state:
|
| 713 |
+
del st.session_state['old_db']
|
| 714 |
+
del st.session_state['plugin']
|
| 715 |
+
st.experimental_rerun()
|
| 716 |
+
|
| 717 |
+
|
| 718 |
+
# END OF PLUGIN
|
| 719 |
+
add_vertical_space(4)
|
| 720 |
+
if 'hf_email' in st.session_state:
|
| 721 |
+
if st.button('🗑 Logout'):
|
| 722 |
+
keys = list(st.session_state.keys())
|
| 723 |
+
for key in keys:
|
| 724 |
+
del st.session_state[key]
|
| 725 |
+
st.experimental_rerun()
|
| 726 |
+
|
| 727 |
+
export_chat()
|
| 728 |
+
add_vertical_space(5)
|
| 729 |
+
html_chat = '<center><h6>🤗 Support the project with a donation for the development of new features 🤗</h6>'
|
| 730 |
+
html_chat += '<br><a href="https://rebrand.ly/SupportAUTOGPTfree"><img src="https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif" alt="PayPal donate button" /></a><center><br>'
|
| 731 |
+
st.markdown(html_chat, unsafe_allow_html=True)
|
| 732 |
+
st.write('Made with ❤️ by [Alessandro CIciarelli](https://intelligenzaartificialeitalia.net)')
|
| 733 |
+
|
| 734 |
+
##### End of sidebar
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
# User input
|
| 738 |
+
# Layout of input/response containers
|
| 739 |
+
input_container = st.container()
|
| 740 |
+
response_container = st.container()
|
| 741 |
+
data_view_container = st.container()
|
| 742 |
+
loading_container = st.container()
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
## Applying the user input box
|
| 747 |
+
with input_container:
|
| 748 |
+
input_text = st.chat_input("🧑💻 Write here 👇", key="input")
|
| 749 |
+
|
| 750 |
+
with data_view_container:
|
| 751 |
+
if 'df' in st.session_state:
|
| 752 |
+
with st.expander("🤖 View your **DATA**"):
|
| 753 |
+
st.data_editor(st.session_state['df'], use_container_width=True)
|
| 754 |
+
if 'pdf' in st.session_state:
|
| 755 |
+
with st.expander("🤖 View your **DOCUMENTs**"):
|
| 756 |
+
st.write(st.session_state['documents'])
|
| 757 |
+
if 'audio' in st.session_state:
|
| 758 |
+
with st.expander("🤖 View your **AUDIO**"):
|
| 759 |
+
st.write(st.session_state['audio_text'])
|
| 760 |
+
if 'yt' in st.session_state:
|
| 761 |
+
with st.expander("🤖 View your **YT video**"):
|
| 762 |
+
st.write(st.session_state['yt_text'])
|
| 763 |
+
if 'web_text' in st.session_state:
|
| 764 |
+
with st.expander("🤖 View the **Website content**"):
|
| 765 |
+
st.write(st.session_state['web_text'])
|
| 766 |
+
if 'old_db' in st.session_state:
|
| 767 |
+
with st.expander("🗂 View your **saved VectorStore**"):
|
| 768 |
+
st.success("📚 VectorStore loaded")
|
| 769 |
+
if 'god_mode_source' in st.session_state:
|
| 770 |
+
with st.expander("🌍 View source"):
|
| 771 |
+
for s in st.session_state['god_mode_source']:
|
| 772 |
+
st.markdown("- " + s)
|
| 773 |
+
|
| 774 |
+
# Response output
|
| 775 |
+
## Function for taking user prompt as input followed by producing AI generated responses
|
| 776 |
+
def generate_response(prompt):
|
| 777 |
+
final_prompt = ""
|
| 778 |
+
make_better = True
|
| 779 |
+
source = ""
|
| 780 |
+
|
| 781 |
+
with loading_container:
|
| 782 |
+
|
| 783 |
+
# FOR DEVELOPMENT PLUGIN
|
| 784 |
+
# if st.session_state['plugin'] == "🔌 PLUGIN NAME" and 'PLUGIN DB' in st.session_state:
|
| 785 |
+
# with st.spinner('🚀 Using PLUGIN NAME...'):
|
| 786 |
+
# solution = st.session_state['PLUGIN DB']({"query": prompt})
|
| 787 |
+
# final_prompt = YourCustomPrompt(prompt, context)
|
| 788 |
+
|
| 789 |
+
|
| 790 |
+
if st.session_state['plugin'] == "📋 Talk with your DATA" and 'df' in st.session_state:
|
| 791 |
+
#get only last message
|
| 792 |
+
context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 793 |
+
if prompt.find('python') != -1 or prompt.find('Code') != -1 or prompt.find('code') != -1 or prompt.find('Python') != -1:
|
| 794 |
+
with st.spinner('🚀 Using tool for python code...'):
|
| 795 |
+
solution = "\n```python\n"
|
| 796 |
+
solution += st.session_state['df'].sketch.howto(prompt, call_display=False)
|
| 797 |
+
solution += "\n```\n\n"
|
| 798 |
+
final_prompt = prompt4Code(prompt, context, solution)
|
| 799 |
+
else:
|
| 800 |
+
with st.spinner('🚀 Using tool to get information...'):
|
| 801 |
+
solution = st.session_state['df'].sketch.ask(prompt, call_display=False)
|
| 802 |
+
final_prompt = prompt4Data(prompt, context, solution)
|
| 803 |
+
|
| 804 |
+
|
| 805 |
+
elif st.session_state['plugin'] == "📝 Talk with your DOCUMENTS" and 'pdf' in st.session_state:
|
| 806 |
+
#get only last message
|
| 807 |
+
context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 808 |
+
with st.spinner('🚀 Using tool to get information...'):
|
| 809 |
+
result = st.session_state['pdf']({"query": prompt})
|
| 810 |
+
solution = result["result"]
|
| 811 |
+
if len(solution.split()) > 110:
|
| 812 |
+
make_better = False
|
| 813 |
+
final_prompt = solution
|
| 814 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 815 |
+
final_prompt += "\n\n✅Source:\n"
|
| 816 |
+
for d in result["source_documents"]:
|
| 817 |
+
final_prompt += "- " + str(d) + "\n"
|
| 818 |
+
else:
|
| 819 |
+
final_prompt = prompt4Context(prompt, context, solution)
|
| 820 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 821 |
+
source += "\n\n✅Source:\n"
|
| 822 |
+
for d in result["source_documents"]:
|
| 823 |
+
source += "- " + str(d) + "\n"
|
| 824 |
+
|
| 825 |
+
|
| 826 |
+
elif st.session_state['plugin'] == "🧠 GOD MODE" and 'god_mode' in st.session_state:
|
| 827 |
+
#get only last message
|
| 828 |
+
context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 829 |
+
with st.spinner('🚀 Using tool to get information...'):
|
| 830 |
+
result = st.session_state['god_mode']({"query": prompt})
|
| 831 |
+
solution = result["result"]
|
| 832 |
+
if len(solution.split()) > 110:
|
| 833 |
+
make_better = False
|
| 834 |
+
final_prompt = solution
|
| 835 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 836 |
+
final_prompt += "\n\n✅Source:\n"
|
| 837 |
+
for d in result["source_documents"]:
|
| 838 |
+
final_prompt += "- " + str(d) + "\n"
|
| 839 |
+
else:
|
| 840 |
+
final_prompt = prompt4Context(prompt, context, solution)
|
| 841 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 842 |
+
source += "\n\n✅Source:\n"
|
| 843 |
+
for d in result["source_documents"]:
|
| 844 |
+
source += "- " + str(d) + "\n"
|
| 845 |
+
|
| 846 |
+
|
| 847 |
+
elif st.session_state['plugin'] == "🔗 Talk with Website" and 'web_sites' in st.session_state:
|
| 848 |
+
#get only last message
|
| 849 |
+
context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 850 |
+
with st.spinner('🚀 Using tool to get information...'):
|
| 851 |
+
result = st.session_state['web_sites']({"query": prompt})
|
| 852 |
+
solution = result["result"]
|
| 853 |
+
if len(solution.split()) > 110:
|
| 854 |
+
make_better = False
|
| 855 |
+
final_prompt = solution
|
| 856 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 857 |
+
final_prompt += "\n\n✅Source:\n"
|
| 858 |
+
for d in result["source_documents"]:
|
| 859 |
+
final_prompt += "- " + str(d) + "\n"
|
| 860 |
+
else:
|
| 861 |
+
final_prompt = prompt4Context(prompt, context, solution)
|
| 862 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 863 |
+
source += "\n\n✅Source:\n"
|
| 864 |
+
for d in result["source_documents"]:
|
| 865 |
+
source += "- " + str(d) + "\n"
|
| 866 |
+
|
| 867 |
+
|
| 868 |
+
|
| 869 |
+
elif st.session_state['plugin'] == "💾 Upload saved VectorStore" and 'old_db' in st.session_state:
|
| 870 |
+
#get only last message
|
| 871 |
+
context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 872 |
+
with st.spinner('🚀 Using tool to get information...'):
|
| 873 |
+
result = st.session_state['old_db']({"query": prompt})
|
| 874 |
+
solution = result["result"]
|
| 875 |
+
if len(solution.split()) > 110:
|
| 876 |
+
make_better = False
|
| 877 |
+
final_prompt = solution
|
| 878 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 879 |
+
final_prompt += "\n\n✅Source:\n"
|
| 880 |
+
for d in result["source_documents"]:
|
| 881 |
+
final_prompt += "- " + str(d) + "\n"
|
| 882 |
+
else:
|
| 883 |
+
final_prompt = prompt4Context(prompt, context, solution)
|
| 884 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 885 |
+
source += "\n\n✅Source:\n"
|
| 886 |
+
for d in result["source_documents"]:
|
| 887 |
+
source += "- " + str(d) + "\n"
|
| 888 |
+
|
| 889 |
+
|
| 890 |
+
elif st.session_state['plugin'] == "🎧 Talk with your AUDIO" and 'audio' in st.session_state:
|
| 891 |
+
#get only last message
|
| 892 |
+
context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 893 |
+
with st.spinner('🚀 Using tool to get information...'):
|
| 894 |
+
result = st.session_state['audio']({"query": prompt})
|
| 895 |
+
solution = result["result"]
|
| 896 |
+
if len(solution.split()) > 110:
|
| 897 |
+
make_better = False
|
| 898 |
+
final_prompt = solution
|
| 899 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 900 |
+
final_prompt += "\n\n✅Source:\n"
|
| 901 |
+
for d in result["source_documents"]:
|
| 902 |
+
final_prompt += "- " + str(d) + "\n"
|
| 903 |
+
else:
|
| 904 |
+
final_prompt = prompt4Audio(prompt, context, solution)
|
| 905 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 906 |
+
source += "\n\n✅Source:\n"
|
| 907 |
+
for d in result["source_documents"]:
|
| 908 |
+
source += "- " + str(d) + "\n"
|
| 909 |
+
|
| 910 |
+
|
| 911 |
+
elif st.session_state['plugin'] == "🎥 Talk with YT video" and 'yt' in st.session_state:
|
| 912 |
+
context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 913 |
+
with st.spinner('🚀 Using tool to get information...'):
|
| 914 |
+
result = st.session_state['yt']({"query": prompt})
|
| 915 |
+
solution = result["result"]
|
| 916 |
+
if len(solution.split()) > 110:
|
| 917 |
+
make_better = False
|
| 918 |
+
final_prompt = solution
|
| 919 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 920 |
+
final_prompt += "\n\n✅Source:\n"
|
| 921 |
+
for d in result["source_documents"]:
|
| 922 |
+
final_prompt += "- " + str(d) + "\n"
|
| 923 |
+
else:
|
| 924 |
+
final_prompt = prompt4YT(prompt, context, solution)
|
| 925 |
+
if 'source_documents' in result and len(result["source_documents"]) > 0:
|
| 926 |
+
source += "\n\n✅Source:\n"
|
| 927 |
+
for d in result["source_documents"]:
|
| 928 |
+
source += "- " + str(d) + "\n"
|
| 929 |
+
|
| 930 |
+
|
| 931 |
+
else:
|
| 932 |
+
#get last message if exists
|
| 933 |
+
if len(st.session_state['past']) == 1:
|
| 934 |
+
context = f"User: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 935 |
+
else:
|
| 936 |
+
context = f"User: {st.session_state['past'][-2]}\nBot: {st.session_state['generated'][-2]}\nUser: {st.session_state['past'][-1]}\nBot: {st.session_state['generated'][-1]}\n"
|
| 937 |
+
|
| 938 |
+
if 'web_search' in st.session_state:
|
| 939 |
+
if st.session_state['web_search'] == "True":
|
| 940 |
+
with st.spinner('🚀 Using internet to get information...'):
|
| 941 |
+
internet_result = ""
|
| 942 |
+
internet_answer = ""
|
| 943 |
+
with DDGS() as ddgs:
|
| 944 |
+
ddgs_gen = ddgs.text(prompt, region=st.session_state['region'], safesearch=st.session_state['safesearch'], timelimit=st.session_state['timelimit'])
|
| 945 |
+
for r in islice(ddgs_gen, st.session_state['max_results']):
|
| 946 |
+
internet_result += str(r) + "\n\n"
|
| 947 |
+
fast_answer = ddgs.answers(prompt)
|
| 948 |
+
for r in islice(fast_answer, 2):
|
| 949 |
+
internet_answer += str(r) + "\n\n"
|
| 950 |
+
|
| 951 |
+
final_prompt = prompt4conversationInternet(prompt, context, internet_result, internet_answer)
|
| 952 |
+
else:
|
| 953 |
+
final_prompt = prompt4conversation(prompt, context)
|
| 954 |
+
else:
|
| 955 |
+
final_prompt = prompt4conversation(prompt, context)
|
| 956 |
+
|
| 957 |
+
if make_better:
|
| 958 |
+
with st.spinner('🚀 Generating response...'):
|
| 959 |
+
print(final_prompt)
|
| 960 |
+
response = st.session_state['chatbot'].chat(final_prompt, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, top_k=top_k, max_new_tokens=max_new_tokens)
|
| 961 |
+
response += source
|
| 962 |
+
else:
|
| 963 |
+
print(final_prompt)
|
| 964 |
+
response = final_prompt
|
| 965 |
+
|
| 966 |
+
return response
|
| 967 |
+
|
| 968 |
+
## Conditional display of AI generated responses as a function of user provided prompts
|
| 969 |
+
with response_container:
|
| 970 |
+
if input_text and 'hf_email' in st.session_state and 'hf_pass' in st.session_state:
|
| 971 |
+
response = generate_response(input_text)
|
| 972 |
+
st.session_state.past.append(input_text)
|
| 973 |
+
st.session_state.generated.append(response)
|
| 974 |
+
|
| 975 |
+
|
| 976 |
+
#print message in normal order, frist user then bot
|
| 977 |
+
if 'generated' in st.session_state:
|
| 978 |
+
if st.session_state['generated']:
|
| 979 |
+
for i in range(len(st.session_state['generated'])):
|
| 980 |
+
with st.chat_message(name="user"):
|
| 981 |
+
st.markdown(st.session_state['past'][i])
|
| 982 |
+
|
| 983 |
+
with st.chat_message(name="assistant"):
|
| 984 |
+
if len(st.session_state['generated'][i].split("✅Source:")) > 1:
|
| 985 |
+
source = st.session_state['generated'][i].split("✅Source:")[1]
|
| 986 |
+
mess = st.session_state['generated'][i].split("✅Source:")[0]
|
| 987 |
+
|
| 988 |
+
st.markdown(mess)
|
| 989 |
+
with st.expander("📚 Source of message number " + str(i+1)):
|
| 990 |
+
st.markdown(source)
|
| 991 |
+
|
| 992 |
+
else:
|
| 993 |
+
st.markdown(st.session_state['generated'][i])
|
| 994 |
+
|
| 995 |
+
st.markdown('', unsafe_allow_html=True)
|
| 996 |
+
|
| 997 |
+
|
| 998 |
+
else:
|
| 999 |
+
st.info("👋 Hey , we are very happy to see you here 🤗")
|
| 1000 |
+
st.info("👉 Please Login to continue, click on top left corner to login 🚀")
|
| 1001 |
+
st.error("👉 If you are not registered on Hugging Face, please register first and then login 🤗")
|