FM-1976 commited on
Commit
33ae49b
Β·
verified Β·
1 Parent(s): 057c7f5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -0
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_cpp import Llama
3
+ import warnings
4
+ warnings.filterwarnings(action='ignore')
5
+ import datetime
6
+ import random
7
+ import string
8
+ from time import sleep
9
+ import tiktoken
10
+ # required for HF SPACES
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ # for counting the tokens in the prompt and in the result
14
+ #context_count = len(encoding.encode(yourtext))
15
+ encoding = tiktoken.get_encoding("r50k_base")
16
+
17
+ verbosity = False
18
+ nCTX = 8192
19
+ sTOPS = ['<eos>']
20
+ modelname = "Gemma2-2B-it"
21
+ # Set the webpage title
22
+ st.set_page_config(
23
+ page_title=f"Your LocalGPT ✨ with {modelname}",
24
+ page_icon="🌟",
25
+ layout="wide")
26
+
27
+ if "modelfile" not in st.session_state:
28
+ st.session_state.modelfile = hf_hub_download(
29
+ repo_id=os.environ.get("REPO_ID", "bartowski/gemma-2-2b-it-GGUF"),
30
+ filename=os.environ.get("MODEL_FILE", "gemma-2-2b-it-Q5_K_M.gguf"),
31
+ )
32
+
33
+ if "hf_model" not in st.session_state:
34
+ st.session_state.hf_model = "Gemma2-2B-it"
35
+ # Initialize chat history for the LLM
36
+ if "messages" not in st.session_state:
37
+ st.session_state.messages = []
38
+
39
+ # Initialize the ChatMEssages for visualization only
40
+ if "chatMessages" not in st.session_state:
41
+ st.session_state.chatMessages = []
42
+
43
+ if "repeat" not in st.session_state:
44
+ st.session_state.repeat = 1.35
45
+
46
+ if "temperature" not in st.session_state:
47
+ st.session_state.temperature = 0.1
48
+
49
+ if "maxlength" not in st.session_state:
50
+ st.session_state.maxlength = 500
51
+
52
+ if "speed" not in st.session_state:
53
+ st.session_state.speed = 0.0
54
+
55
+ if "numOfTurns" not in st.session_state:
56
+ st.session_state.numOfTurns = 0
57
+
58
+ if "maxTurns" not in st.session_state:
59
+ st.session_state.maxTurns = 5 #must be odd number, greater than equal to 5
60
+
61
+ def writehistory(filename,text):
62
+ with open(filename, 'a', encoding='utf-8') as f:
63
+ f.write(text)
64
+ f.write('\n')
65
+ f.close()
66
+
67
+ def genRANstring(n):
68
+ """
69
+ n = int number of char to randomize
70
+ """
71
+ N = n
72
+ res = ''.join(random.choices(string.ascii_uppercase +
73
+ string.digits, k=N))
74
+ return res
75
+ #
76
+
77
+ @st.cache_resource
78
+ def create_chat():
79
+ # Set HF API token and HF repo
80
+ from llama_cpp import Llama
81
+ client = Llama(
82
+ model_path=st.session_state.modelfile,
83
+ #n_gpu_layers=-1, #enable GPU
84
+ temperature=0.24,
85
+ n_ctx=nCTX,
86
+ max_tokens=600,
87
+ repeat_penalty=1.176,
88
+ stop=sTOPS,
89
+ verbose=verbosity,
90
+ )
91
+ print('loading gemma-2-2b-it-Q5_K_M.gguf with LlamaCPP...')
92
+ return client
93
+
94
+
95
+ # create THE SESSIoN STATES
96
+ if "logfilename" not in st.session_state:
97
+ ## Logger file
98
+ logfile = f'{genRANstring(5)}_log.txt'
99
+ st.session_state.logfilename = logfile
100
+ #Write in the history the first 2 sessions
101
+ writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with πŸŒ€ {modelname}\n---\n🧠🫑: You are a helpful assistant.')
102
+ writehistory(st.session_state.logfilename,f'πŸŒ€: How may I help you today?')
103
+
104
+
105
+ #AVATARS
106
+ av_us = 'https://github.com/fabiomatricardi/Gemma2-2b-it-chatbot/raw/main/images/user.png' # './man.png' #"πŸ¦–" #A single emoji, e.g. "πŸ§‘β€πŸ’»", "πŸ€–", "πŸ¦–". Shortcodes are not supported.
107
+ av_ass = 'https://github.com/fabiomatricardi/Gemma2-2b-it-chatbot/raw/main/images/assistant2.png' #'./robot.png'
108
+
109
+ ### START STREAMLIT UI
110
+ # Create a header element
111
+ st.image('https://github.com/fabiomatricardi/Gemma2-2b-it-chatbot/raw/main/images/Gemma-2-Banner.original.jpg',use_column_width=True)
112
+ mytitle = f'> *🌟 {modelname} with {nCTX} tokens Context window* - Turn based Chat available with max capacity of :orange[**{st.session_state.maxTurns} messages**].'
113
+ st.markdown(mytitle, unsafe_allow_html=True)
114
+ #st.markdown('> Local Chat ')
115
+ #st.markdown('---')
116
+
117
+ # CREATE THE SIDEBAR
118
+ with st.sidebar:
119
+ st.image('https://github.com/fabiomatricardi/Gemma2-2b-it-chatbot/raw/main/images/banner.png', use_column_width=True)
120
+ st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.65, step=0.01)
121
+ st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000,
122
+ value=550, step=50)
123
+ st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.176, step=0.02)
124
+ st.session_state.turns = st.toggle('Turn based', value=False, help='Activate Conversational Turn Chat with History',
125
+ disabled=False, label_visibility="visible")
126
+ st.markdown(f"*Number of Max Turns*: {st.session_state.maxTurns}")
127
+ actualTurns = st.markdown(f"*Chat History Lenght*: :green[Good]")
128
+ statspeed = st.markdown(f'πŸ’« speed: {st.session_state.speed} t/s')
129
+ btnClear = st.button("Clear History",type="primary", use_container_width=True)
130
+ st.markdown(f"**Logfile**: {st.session_state.logfilename}")
131
+
132
+ llm = create_chat()
133
+
134
+ # Display chat messages from history on app rerun
135
+ for message in st.session_state.chatMessages:
136
+ if message["role"] == "user":
137
+ with st.chat_message(message["role"],avatar=av_us):
138
+ st.markdown(message["content"])
139
+ else:
140
+ with st.chat_message(message["role"],avatar=av_ass):
141
+ st.markdown(message["content"])
142
+ # Accept user input
143
+ if myprompt := st.chat_input("What is an AI model?"):
144
+ # Add user message to chat history
145
+ st.session_state.messages.append({"role": "user", "content": myprompt})
146
+ st.session_state.chatMessages.append({"role": "user", "content": myprompt})
147
+ st.session_state.numOfTurns = len(st.session_state.messages)
148
+ # Display user message in chat message container
149
+ with st.chat_message("user", avatar=av_us):
150
+ st.markdown(myprompt)
151
+ usertext = f"user: {myprompt}"
152
+ writehistory(st.session_state.logfilename,usertext)
153
+ # Display assistant response in chat message container
154
+ with st.chat_message("assistant",avatar=av_ass):
155
+ message_placeholder = st.empty()
156
+ with st.spinner("Thinking..."):
157
+ start = datetime.datetime.now()
158
+ response = ''
159
+ conv_messages = []
160
+ if st.session_state.turns:
161
+ if st.session_state.numOfTurns > st.session_state.maxTurns:
162
+ conv_messages = st.session_state.messages[-st.session_state.maxTurns:]
163
+ actualTurns.markdown(f"*Chat History Lenght*: :red[Trimmed]")
164
+ else:
165
+ conv_messages = st.session_state.messages
166
+ else:
167
+ conv_messages.append(st.session_state.messages[-1])
168
+ full_response = ""
169
+ for chunk in llm.create_chat_completion(
170
+ messages=conv_messages,
171
+ temperature=st.session_state.temperature,
172
+ repeat_penalty= st.session_state.repeat,
173
+ stop=sTOPS,
174
+ max_tokens=st.session_state.maxlength,
175
+ stream=True,):
176
+ try:
177
+ if chunk["choices"][0]["delta"]["content"]:
178
+ full_response += chunk["choices"][0]["delta"]["content"]
179
+ message_placeholder.markdown(full_response + "🟑")
180
+ delta = datetime.datetime.now() -start
181
+ totalseconds = delta.total_seconds()
182
+ prompttokens = len(encoding.encode(myprompt))
183
+ assistanttokens = len(encoding.encode(full_response))
184
+ totaltokens = prompttokens + assistanttokens
185
+ st.session_state.speed = totaltokens/totalseconds
186
+ statspeed.markdown(f'πŸ’« speed: {st.session_state.speed:.2f} t/s')
187
+ except:
188
+ pass
189
+
190
+ delta = datetime.datetime.now() - start
191
+ totalseconds = delta.total_seconds()
192
+ prompttokens = len(encoding.encode(myprompt))
193
+ assistanttokens = len(encoding.encode(full_response))
194
+ totaltokens = prompttokens + assistanttokens
195
+ st.session_state.speed = totaltokens/totalseconds
196
+ statspeed.markdown(f'πŸ’« speed: {st.session_state.speed:.2f} t/s')
197
+ toregister = full_response + f"""
198
+ ```
199
+ 🧾 prompt tokens: {prompttokens}
200
+ πŸ“ˆ generated tokens: {assistanttokens}
201
+ ⏳ generation time: {delta}
202
+ πŸ’« speed: {st.session_state.speed:.3f} t/s
203
+ ```"""
204
+ message_placeholder.markdown(toregister)
205
+ asstext = f"assistant: {toregister}"
206
+ writehistory(st.session_state.logfilename,asstext)
207
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
208
+ st.session_state.chatMessages.append({"role": "assistant", "content": toregister})
209
+ st.session_state.numOfTurns = len(st.session_state.messages)