kambris commited on
Commit
63c342c
·
verified ·
1 Parent(s): 3d15a21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -7
app.py CHANGED
@@ -10,6 +10,26 @@ import networkx as nx
10
 
11
  st.set_page_config(layout="wide")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def parse_voynich_word(word):
14
  """Parse a Voynich word into individual characters - NO assumptions about digraphs"""
15
  if not word or word.strip() == '':
@@ -143,12 +163,39 @@ def get_download_link_csv(df, filename):
143
 
144
  st.title("Voynich Manuscript Analyzer")
145
  st.write("Upload your CSV file to discover potential patterns and character distributions.")
146
- st.write("**Bottom-up analysis**: Each character is treated independently - no assumptions about digraphs")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
149
 
150
  if uploaded_file is not None:
151
- df = pd.read_csv(uploaded_file, header=None)
 
 
 
 
 
 
 
 
 
 
 
 
152
  words, chars_list, char_positions, char_connections, word_positions, line_word_map = analyze_csv(df)
153
 
154
  st.subheader("Basic Statistics")
@@ -177,7 +224,10 @@ if uploaded_file is not None:
177
  char_bigrams[bigram] += 1
178
 
179
  char_bigram_df = pd.DataFrame([
180
- {'Bigram': ''.join(bigram), 'Char1': bigram[0], 'Char2': bigram[1], 'Count': count}
 
 
 
181
  for bigram, count in char_bigrams.most_common(30)
182
  ])
183
  st.dataframe(char_bigram_df)
@@ -193,7 +243,7 @@ if uploaded_file is not None:
193
  char_trigrams[trigram] += 1
194
 
195
  char_trigram_df = pd.DataFrame([
196
- {'Trigram': ''.join(trigram), 'Count': count}
197
  for trigram, count in char_trigrams.most_common(30)
198
  ])
199
  st.dataframe(char_trigram_df)
@@ -206,7 +256,7 @@ if uploaded_file is not None:
206
  word_bigrams[bigram] += 1
207
 
208
  word_bigram_df = pd.DataFrame([
209
- {'Word1': bigram[0], 'Word2': bigram[1], 'Count': count}
210
  for bigram, count in word_bigrams.most_common(20)
211
  ])
212
  st.dataframe(word_bigram_df)
@@ -219,7 +269,10 @@ if uploaded_file is not None:
219
  word_trigrams[trigram] += 1
220
 
221
  word_trigram_df = pd.DataFrame([
222
- {'Word1': trigram[0], 'Word2': trigram[1], 'Word3': trigram[2], 'Count': count}
 
 
 
223
  for trigram, count in word_trigrams.most_common(20)
224
  ])
225
  st.dataframe(word_trigram_df)
@@ -370,9 +423,11 @@ if uploaded_file is not None:
370
  st.subheader("Overall Character Frequency")
371
  all_chars_flat = [char for chars in chars_list for char in chars]
372
  char_freq = Counter(all_chars_flat)
 
373
 
374
  fig_freq = plt.figure(figsize=(12, 6))
375
  char_freq_df = pd.DataFrame(char_freq.most_common(), columns=['Character', 'Count'])
 
376
  plt.bar(char_freq_df['Character'], char_freq_df['Count'])
377
  plt.title("Character Frequency Distribution")
378
  plt.xlabel("Character")
@@ -380,6 +435,7 @@ if uploaded_file is not None:
380
  plt.xticks(rotation=45)
381
  st.pyplot(fig_freq)
382
  st.dataframe(char_freq_df)
 
383
 
384
  # Character Position Heatmap
385
  st.subheader("Character Position Heatmap")
@@ -477,7 +533,9 @@ if uploaded_file is not None:
477
  ngrams[ngram] += 1
478
 
479
  ngram_df = pd.DataFrame([
480
- {'Pattern': ''.join(ngram), 'Count': count, 'Percentage': f"{count/len(chars_list)*100:.2f}%"}
 
 
481
  for ngram, count in ngrams.most_common(30)
482
  ])
483
  st.dataframe(ngram_df)
 
10
 
11
  st.set_page_config(layout="wide")
12
 
13
+ # Add custom CSS for floating image
14
+ st.markdown("""
15
+ <style>
16
+ .floating-image {
17
+ position: fixed;
18
+ bottom: 20px;
19
+ right: 20px;
20
+ z-index: 9999;
21
+ width: 150px;
22
+ height: auto;
23
+ border-radius: 10px;
24
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3);
25
+ transition: transform 0.3s ease;
26
+ }
27
+ .floating-image:hover {
28
+ transform: scale(1.1);
29
+ }
30
+ </style>
31
+ """, unsafe_allow_html=True)
32
+
33
  def parse_voynich_word(word):
34
  """Parse a Voynich word into individual characters - NO assumptions about digraphs"""
35
  if not word or word.strip() == '':
 
163
 
164
  st.title("Voynich Manuscript Analyzer")
165
  st.write("Upload your CSV file to discover potential patterns and character distributions.")
166
+
167
+ # Add floating image uploader
168
+ floating_image_file = st.file_uploader("Upload a floating image (optional)",
169
+ type=['png', 'jpg', 'jpeg', 'gif'],
170
+ key="floating_image")
171
+
172
+ if floating_image_file is not None:
173
+ # Convert image to base64 for embedding
174
+ import base64
175
+ image_bytes = floating_image_file.read()
176
+ image_b64 = base64.b64encode(image_bytes).decode()
177
+
178
+ # Display floating image
179
+ st.markdown(f"""
180
+ <img src="data:image/png;base64,{image_b64}" class="floating-image" alt="Floating image">
181
+ """, unsafe_allow_html=True)
182
 
183
  uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
184
 
185
  if uploaded_file is not None:
186
+ # Read the entire file as text first
187
+ uploaded_file.seek(0)
188
+ content = uploaded_file.read().decode('utf-8')
189
+
190
+ # Split into lines (handle both \n and \r\n)
191
+ lines = content.replace('\r\n', '\n').replace('\r', '\n').strip().split('\n')
192
+ # Filter out empty lines - only keep lines with actual content
193
+ lines = [line for line in lines if line.strip()]
194
+ data = [line.split(',') for line in lines]
195
+
196
+ # Create DataFrame from parsed data
197
+ df = pd.DataFrame(data)
198
+
199
  words, chars_list, char_positions, char_connections, word_positions, line_word_map = analyze_csv(df)
200
 
201
  st.subheader("Basic Statistics")
 
224
  char_bigrams[bigram] += 1
225
 
226
  char_bigram_df = pd.DataFrame([
227
+ {'Bigram': ''.join(str(c) for c in bigram),
228
+ 'Char1': str(bigram[0]),
229
+ 'Char2': str(bigram[1]),
230
+ 'Count': int(count)}
231
  for bigram, count in char_bigrams.most_common(30)
232
  ])
233
  st.dataframe(char_bigram_df)
 
243
  char_trigrams[trigram] += 1
244
 
245
  char_trigram_df = pd.DataFrame([
246
+ {'Trigram': ''.join(str(c) for c in trigram), 'Count': int(count)}
247
  for trigram, count in char_trigrams.most_common(30)
248
  ])
249
  st.dataframe(char_trigram_df)
 
256
  word_bigrams[bigram] += 1
257
 
258
  word_bigram_df = pd.DataFrame([
259
+ {'Word1': str(bigram[0]), 'Word2': str(bigram[1]), 'Count': int(count)}
260
  for bigram, count in word_bigrams.most_common(20)
261
  ])
262
  st.dataframe(word_bigram_df)
 
269
  word_trigrams[trigram] += 1
270
 
271
  word_trigram_df = pd.DataFrame([
272
+ {'Word1': str(trigram[0]),
273
+ 'Word2': str(trigram[1]),
274
+ 'Word3': str(trigram[2]),
275
+ 'Count': int(count)}
276
  for trigram, count in word_trigrams.most_common(20)
277
  ])
278
  st.dataframe(word_trigram_df)
 
423
  st.subheader("Overall Character Frequency")
424
  all_chars_flat = [char for chars in chars_list for char in chars]
425
  char_freq = Counter(all_chars_flat)
426
+ total_chars = len(all_chars_flat)
427
 
428
  fig_freq = plt.figure(figsize=(12, 6))
429
  char_freq_df = pd.DataFrame(char_freq.most_common(), columns=['Character', 'Count'])
430
+ char_freq_df['Percentage'] = (char_freq_df['Count'] / total_chars * 100).round(2)
431
  plt.bar(char_freq_df['Character'], char_freq_df['Count'])
432
  plt.title("Character Frequency Distribution")
433
  plt.xlabel("Character")
 
435
  plt.xticks(rotation=45)
436
  st.pyplot(fig_freq)
437
  st.dataframe(char_freq_df)
438
+ st.markdown(get_download_link_csv(char_freq_df, "character_frequency.csv"), unsafe_allow_html=True)
439
 
440
  # Character Position Heatmap
441
  st.subheader("Character Position Heatmap")
 
533
  ngrams[ngram] += 1
534
 
535
  ngram_df = pd.DataFrame([
536
+ {'Pattern': ''.join(str(c) for c in ngram),
537
+ 'Count': int(count),
538
+ 'Percentage': f"{count/len(chars_list)*100:.2f}%"}
539
  for ngram, count in ngrams.most_common(30)
540
  ])
541
  st.dataframe(ngram_df)