coztomate commited on
Commit
4a5fd46
·
1 Parent(s): fceb790

added config

Browse files
Files changed (2) hide show
  1. app.py +68 -81
  2. config_llm.py +15 -0
app.py CHANGED
@@ -5,29 +5,54 @@ import io
5
  from openai import OpenAI
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
  from text_generation import Client
 
 
8
 
9
-
10
- hf_key_mistral = st.secrets["hf_key"]
11
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- from huggingface_hub import InferenceClient
14
- client = InferenceClient(token=hf_key_mistral)
15
 
16
- # load the simplifier model
17
- # Load the tokenizer and model (do this outside the function for efficiency)
18
  tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-small-finetuned-text-simplification")
19
  model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-small-finetuned-text-simplification")
20
 
 
21
  def simplify_text(input_text):
22
  # Tokenize and encode the input text
23
  input_ids = tokenizer.encode("simplify: " + input_text, return_tensors="pt")
24
-
25
  # Generate the simplified text
26
  output = model.generate(input_ids, min_length=5, max_length=80, do_sample=True)
27
-
28
  # Decode the simplified text
29
  simplified_text = tokenizer.decode(output[0], skip_special_tokens=True)
30
-
31
  # Post-process to ensure the output ends with a complete sentence
32
  # Find the last period, question mark, or exclamation point
33
  last_valid_ending = max(simplified_text.rfind('.'), simplified_text.rfind('?'), simplified_text.rfind('!'))
@@ -37,10 +62,10 @@ def simplify_text(input_text):
37
  else:
38
  # No sentence ending found; return the whole text or handle as appropriate
39
  cleaned_text = simplified_text
40
-
41
  return cleaned_text
42
 
43
- # Define the path to your example text
 
44
  example_text_path = "example_text.txt"
45
 
46
  # Function to load example text from a file
@@ -60,45 +85,19 @@ def load_image(image_path):
60
  img.load()
61
  return img
62
 
63
- # Initialize session state variables
64
- if 'user_input' not in st.session_state:
65
- st.session_state['user_input'] = ""
66
- if 'simplified_text' not in st.session_state:
67
- st.session_state['simplified_text'] = ''
68
- if 'new_caption' not in st.session_state:
69
- st.session_state['new_caption'] = None
70
- if 'model_clip' not in st.session_state:
71
- st.session_state['model_clip'] = None
72
- if 'transform_clip' not in st.session_state:
73
- st.session_state['transform_clip'] = None
74
- if 'openai_api_key' not in st.session_state:
75
- st.session_state['openai_api_key'] = ''
76
- if 'message_content_from_caption' not in st.session_state:
77
- st.session_state['message_content_from_caption'] = ''
78
- if 'message_content_from_simplified_text' not in st.session_state:
79
- st.session_state['message_content_from_simplified_text'] = ''
80
- if 'mistral_from_caption' not in st.session_state:
81
- st.session_state['mistral_from_caption'] = ''
82
- if 'mistral_from_simplified' not in st.session_state:
83
- st.session_state['mistral_from_simplified'] = ''
84
- if 'image_from_caption' not in st.session_state:
85
- st.session_state['image_from_caption'] = None
86
- if 'image_from_simplified_text' not in st.session_state:
87
- st.session_state['image_from_simplified_text'] = None
88
- if 'image_from_press_text' not in st.session_state:
89
- st.session_state['image_from_press_text'] = None
90
- if 'image_from_press_text_from_caption' not in st.session_state:
91
- st.session_state['image_from_press_text_from_caption'] = None
92
 
93
 
94
- ################################################
95
 
96
  # Create a Streamlit app
97
  st.title("ARTSPEAK > s i m p l i f i e r")
98
 
99
  st.markdown("---")
100
 
101
- # Create a sub-section
102
  with st.expander("Upload Files"):
103
  st.markdown("## Upload Text and Image")
104
  ##### Upload of files
@@ -124,10 +123,11 @@ with st.expander("Upload Files"):
124
 
125
  st.markdown("---")
126
 
127
- #### Simplifier
128
  with st.expander("Simplify Text and Image"):
129
  st.markdown("## 'Simplify' Text and Image")
130
 
 
131
  if st.button("Simplify the Input Text"):
132
  if user_input:
133
  simplified_text = simplify_text(user_input)
@@ -139,8 +139,7 @@ with st.expander("Simplify Text and Image"):
139
  if st.session_state['simplified_text']:
140
  st.write(st.session_state['simplified_text'])
141
 
142
-
143
- ####Get new caption
144
  # Button to get new caption
145
  if st.button("Get New Caption for Image"):
146
  # Initialize image data variable
@@ -164,7 +163,7 @@ with st.expander("Simplify Text and Image"):
164
  caption = client.image_to_text(image_data)
165
  # Update the session state
166
  st.session_state['new_caption'] = caption
167
- st.write(f"New Caption for your Image: {caption}")
168
 
169
  except Exception as e:
170
  st.error(f"An error occurred: {e}")
@@ -182,41 +181,29 @@ with st.expander("Press Text Generation"):
182
  # Define radio button options
183
  option = st.radio(
184
  "Choose a Language Model:",
185
- ('Mistral 8x7B (free)', 'GPT-3.5 Turbo (needs API Key)'))
186
 
187
  # Conditional logic based on radio button choice
188
- if option == 'Mistral 8x7B (free)':
189
  st.header("Mistral 8x7B")
190
 
191
  ############
192
  ###Mistral##
193
  ############
194
 
195
-
196
- #defaults for Mistral
197
- DEFAULT_SYSTEM_PROMPT = "You will be given a very short description of a contemporary artwork. Please create a complex exhibition press text based on the given artwork description using international art english dealing with post-colonialism, military industrial complex, anthropocene, identity politics and queerness through the language of Rancière, Fontane, Paglen, Deleuze, Steyerl, Spivak, Preciado, Žižek, Foucault and Harraway. Avoid excessive namedropping. Just output press text without explaining your actions."
198
- MAX_MAX_NEW_TOKENS = 4096
199
- DEFAULT_MAX_NEW_TOKENS = 1000
200
- EOS_STRING = "</s>"
201
- EOT_STRING = "<EOT>"
202
-
203
- model_id_mistral = "mistralai/Mixtral-8x7B-Instruct-v0.1"
204
-
205
- API_URL = "https://api-inference.huggingface.co/models/" + model_id_mistral
206
- headers = {"Authorization": f"Bearer {hf_key_mistral}"}
207
 
208
  client_mistral = Client(
209
- API_URL,
210
  headers=headers,
211
  )
212
 
213
  def run_single_input(
214
  message: str,
215
- system_prompt: str,
216
- max_new_tokens: int = 2048,
217
- temperature: float = 0.3,
218
- top_p: float = 0.9,
219
- top_k: int = 50,
220
  ) -> str:
221
  """
222
  Run the model for a single input and return a single output.
@@ -227,13 +214,12 @@ with st.expander("Press Text Generation"):
227
  max_new_tokens=max_new_tokens,
228
  do_sample=True,
229
  top_p=top_p,
230
- top_k=top_k,
231
  temperature=temperature,
232
  )
233
  stream = client_mistral.generate_stream(prompt, **generate_kwargs)
234
  output = ""
235
  for response in stream:
236
- if any([end_token in response.token.text for end_token in [EOS_STRING, EOT_STRING]]):
237
  break # Stop at the first end token
238
  else:
239
  output += response.token.text
@@ -245,7 +231,7 @@ with st.expander("Press Text Generation"):
245
  if st.button("Generate Press Text from New Image Caption with Mistral"):
246
  if st.session_state['new_caption']:
247
  try:
248
- st.session_state['mistral_from_caption'] = run_single_input(st.session_state['new_caption'], DEFAULT_SYSTEM_PROMPT)
249
  except Exception as e:
250
  st.error(f"An error occurred: {e}")
251
  else:
@@ -260,7 +246,7 @@ with st.expander("Press Text Generation"):
260
  if st.button("Generate Press Text from Simplified Text with Mistral"):
261
  if st.session_state['simplified_text']:
262
  try:
263
- st.session_state['mistral_from_simplified'] = run_single_input(st.session_state['simplified_text'], DEFAULT_SYSTEM_PROMPT)
264
  except Exception as e:
265
  st.error(f"An error occurred: {e}")
266
  else:
@@ -271,14 +257,12 @@ with st.expander("Press Text Generation"):
271
  st.write("Generated Press Text from Simplified Text:")
272
  st.write(st.session_state['mistral_from_simplified'])
273
 
274
-
275
-
276
- elif option == 'GPT-3.5 Turbo (needs API Key)':
277
  st.header("GPT-3.5")
278
 
279
- #######
280
- #OpenAI API
281
- #######
282
  # Add a text input for the OpenAI API key
283
  api_key_input = st.text_input("Enter your OpenAI API key to continue", type="password")
284
 
@@ -293,8 +277,11 @@ with st.expander("Press Text Generation"):
293
  client = OpenAI(api_key=api_key,)
294
  completion = client.chat.completions.create(
295
  model="gpt-3.5-turbo",
 
 
 
296
  messages=[
297
- {"role": "system", "content": "I give a short description of an artwork. Please create a complex exhibition press text based on the given artwork description using international art english dealing with post-colonialism, military industrial complex, anthropocene, identity politics and queerness through the language of Rancière, Fontane, Paglen, Deleuze, Steyerl, Spivak, Preciado, Žižek, Foucault and Harraway. Avoid excessive namedropping. Just output press text without explaining your actions."},
298
  {"role": "user", "content": prompt_message}
299
  ]
300
  )
@@ -333,9 +320,9 @@ with st.expander("Press Text Generation"):
333
 
334
  st.markdown("---")
335
 
336
- ############
337
- ##Diffusor##
338
- ############
339
 
340
  with st.expander("Image Generation"):
341
  st.markdown("## Generate new Images from Texts")
 
5
  from openai import OpenAI
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
  from text_generation import Client
8
+ from huggingface_hub import InferenceClient
9
+ import config_llm
10
 
11
+ # Initialize session state variables
12
+ if 'user_input' not in st.session_state:
13
+ st.session_state['user_input'] = ""
14
+ if 'simplified_text' not in st.session_state:
15
+ st.session_state['simplified_text'] = ''
16
+ if 'new_caption' not in st.session_state:
17
+ st.session_state['new_caption'] = None
18
+ if 'model_clip' not in st.session_state:
19
+ st.session_state['model_clip'] = None
20
+ if 'transform_clip' not in st.session_state:
21
+ st.session_state['transform_clip'] = None
22
+ if 'openai_api_key' not in st.session_state:
23
+ st.session_state['openai_api_key'] = ''
24
+ if 'huggingface_key' not in st.session_state:
25
+ st.session_state['huggingface_key'] = ''
26
+ if 'message_content_from_caption' not in st.session_state:
27
+ st.session_state['message_content_from_caption'] = ''
28
+ if 'message_content_from_simplified_text' not in st.session_state:
29
+ st.session_state['message_content_from_simplified_text'] = ''
30
+ if 'mistral_from_caption' not in st.session_state:
31
+ st.session_state['mistral_from_caption'] = ''
32
+ if 'mistral_from_simplified' not in st.session_state:
33
+ st.session_state['mistral_from_simplified'] = ''
34
+ if 'image_from_caption' not in st.session_state:
35
+ st.session_state['image_from_caption'] = None
36
+ if 'image_from_simplified_text' not in st.session_state:
37
+ st.session_state['image_from_simplified_text'] = None
38
+ if 'image_from_press_text' not in st.session_state:
39
+ st.session_state['image_from_press_text'] = None
40
+ if 'image_from_press_text_from_caption' not in st.session_state:
41
+ st.session_state['image_from_press_text_from_caption'] = None
42
 
 
 
43
 
44
+ # Load the tokenizer and simplifier model
 
45
  tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-small-finetuned-text-simplification")
46
  model = AutoModelForSeq2SeqLM.from_pretrained("mrm8488/t5-small-finetuned-text-simplification")
47
 
48
+ # Function to simplify text
49
  def simplify_text(input_text):
50
  # Tokenize and encode the input text
51
  input_ids = tokenizer.encode("simplify: " + input_text, return_tensors="pt")
 
52
  # Generate the simplified text
53
  output = model.generate(input_ids, min_length=5, max_length=80, do_sample=True)
 
54
  # Decode the simplified text
55
  simplified_text = tokenizer.decode(output[0], skip_special_tokens=True)
 
56
  # Post-process to ensure the output ends with a complete sentence
57
  # Find the last period, question mark, or exclamation point
58
  last_valid_ending = max(simplified_text.rfind('.'), simplified_text.rfind('?'), simplified_text.rfind('!'))
 
62
  else:
63
  # No sentence ending found; return the whole text or handle as appropriate
64
  cleaned_text = simplified_text
 
65
  return cleaned_text
66
 
67
+
68
+ # Define the path to example text
69
  example_text_path = "example_text.txt"
70
 
71
  # Function to load example text from a file
 
85
  img.load()
86
  return img
87
 
88
+ #get huggingface key
89
+ st.session_state['huggingface_key'] = st.secrets["hf_key"]
90
+ client = InferenceClient(token=st.session_state['huggingface_key'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
 
93
+ ########################################################################
94
 
95
  # Create a Streamlit app
96
  st.title("ARTSPEAK > s i m p l i f i e r")
97
 
98
  st.markdown("---")
99
 
100
+ # Create a sub-section for uploading the files
101
  with st.expander("Upload Files"):
102
  st.markdown("## Upload Text and Image")
103
  ##### Upload of files
 
123
 
124
  st.markdown("---")
125
 
126
+ #### Simplifier and Image Caption
127
  with st.expander("Simplify Text and Image"):
128
  st.markdown("## 'Simplify' Text and Image")
129
 
130
+ ## Text simplifier
131
  if st.button("Simplify the Input Text"):
132
  if user_input:
133
  simplified_text = simplify_text(user_input)
 
139
  if st.session_state['simplified_text']:
140
  st.write(st.session_state['simplified_text'])
141
 
142
+ ## Get new caption
 
143
  # Button to get new caption
144
  if st.button("Get New Caption for Image"):
145
  # Initialize image data variable
 
163
  caption = client.image_to_text(image_data)
164
  # Update the session state
165
  st.session_state['new_caption'] = caption
166
+ st.write(st.session_state['new_caption'])
167
 
168
  except Exception as e:
169
  st.error(f"An error occurred: {e}")
 
181
  # Define radio button options
182
  option = st.radio(
183
  "Choose a Language Model:",
184
+ ('Mistral 8x7B', 'GPT-3.5 Turbo'))
185
 
186
  # Conditional logic based on radio button choice
187
+ if option == 'Mistral 8x7B':
188
  st.header("Mistral 8x7B")
189
 
190
  ############
191
  ###Mistral##
192
  ############
193
 
194
+ headers = {"Authorization": f"Bearer {st.session_state['huggingface_key']}"}
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  client_mistral = Client(
197
+ config_llm.API_URL,
198
  headers=headers,
199
  )
200
 
201
  def run_single_input(
202
  message: str,
203
+ system_prompt: str = config_llm.DEFAULT_SYSTEM_PROMPT,
204
+ max_new_tokens: int = config_llm.MAX_NEW_TOKENS,
205
+ temperature: float = config_llm.TEMPERATURE,
206
+ top_p: float = config_llm.TOP_P
 
207
  ) -> str:
208
  """
209
  Run the model for a single input and return a single output.
 
214
  max_new_tokens=max_new_tokens,
215
  do_sample=True,
216
  top_p=top_p,
 
217
  temperature=temperature,
218
  )
219
  stream = client_mistral.generate_stream(prompt, **generate_kwargs)
220
  output = ""
221
  for response in stream:
222
+ if any([end_token in response.token.text for end_token in [config_llm.EOS_STRING, config_llm.EOT_STRING]]):
223
  break # Stop at the first end token
224
  else:
225
  output += response.token.text
 
231
  if st.button("Generate Press Text from New Image Caption with Mistral"):
232
  if st.session_state['new_caption']:
233
  try:
234
+ st.session_state['mistral_from_caption'] = run_single_input(st.session_state['new_caption'], config_llm.DEFAULT_SYSTEM_PROMPT)
235
  except Exception as e:
236
  st.error(f"An error occurred: {e}")
237
  else:
 
246
  if st.button("Generate Press Text from Simplified Text with Mistral"):
247
  if st.session_state['simplified_text']:
248
  try:
249
+ st.session_state['mistral_from_simplified'] = run_single_input(st.session_state['simplified_text'], config_llm.DEFAULT_SYSTEM_PROMPT)
250
  except Exception as e:
251
  st.error(f"An error occurred: {e}")
252
  else:
 
257
  st.write("Generated Press Text from Simplified Text:")
258
  st.write(st.session_state['mistral_from_simplified'])
259
 
260
+ elif option == 'GPT-3.5 Turbo':
 
 
261
  st.header("GPT-3.5")
262
 
263
+ ##########
264
+ ##OpenAI##
265
+ #########
266
  # Add a text input for the OpenAI API key
267
  api_key_input = st.text_input("Enter your OpenAI API key to continue", type="password")
268
 
 
277
  client = OpenAI(api_key=api_key,)
278
  completion = client.chat.completions.create(
279
  model="gpt-3.5-turbo",
280
+ max_tokens=config_llm.MAX_NEW_TOKENS,
281
+ temperature = config_llm.TEMPERATURE,
282
+ top_p = config_llm.TOP_P,
283
  messages=[
284
+ {"role": "system", "content": config_llm.DEFAULT_SYSTEM_PROMPT},
285
  {"role": "user", "content": prompt_message}
286
  ]
287
  )
 
320
 
321
  st.markdown("---")
322
 
323
+ ########################################################################
324
+
325
+ ## Image Generation Interface
326
 
327
  with st.expander("Image Generation"):
328
  st.markdown("## Generate new Images from Texts")
config_llm.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration and Constants
2
+
3
+ # Defaults for Mistral/OpenAI
4
+ DEFAULT_SYSTEM_PROMPT = "You will be given a very short description of a contemporary artwork. Please create a complex exhibition press text based on the given artwork description using international art english dealing with post-colonialism, military industrial complex, anthropocene, identity politics and queerness through the language of Rancière, Fontane, Paglen, Deleuze, Steyerl, Spivak, Preciado, Žižek, Foucault and Harraway. Avoid excessive namedropping. Just output press text without explaining your actions."
5
+ EOS_STRING = "</s>"
6
+ EOT_STRING = "<EOT>"
7
+
8
+ # Mistral Model Configuration
9
+ model_id_mistral = "mistralai/Mixtral-8x7B-Instruct-v0.1"
10
+ API_URL = f"https://api-inference.huggingface.co/models/{model_id_mistral}"
11
+
12
+ #model parameters
13
+ MAX_NEW_TOKENS = 2048
14
+ TEMPERATURE = 0.7
15
+ TOP_P = 0.8