Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,26 @@ import networkx as nx
|
|
| 10 |
|
| 11 |
st.set_page_config(layout="wide")
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def parse_voynich_word(word):
|
| 14 |
"""Parse a Voynich word into individual characters - NO assumptions about digraphs"""
|
| 15 |
if not word or word.strip() == '':
|
|
@@ -143,12 +163,39 @@ def get_download_link_csv(df, filename):
|
|
| 143 |
|
| 144 |
st.title("Voynich Manuscript Analyzer")
|
| 145 |
st.write("Upload your CSV file to discover potential patterns and character distributions.")
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
| 149 |
|
| 150 |
if uploaded_file is not None:
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
words, chars_list, char_positions, char_connections, word_positions, line_word_map = analyze_csv(df)
|
| 153 |
|
| 154 |
st.subheader("Basic Statistics")
|
|
@@ -177,7 +224,10 @@ if uploaded_file is not None:
|
|
| 177 |
char_bigrams[bigram] += 1
|
| 178 |
|
| 179 |
char_bigram_df = pd.DataFrame([
|
| 180 |
-
{'Bigram': ''.join(
|
|
|
|
|
|
|
|
|
|
| 181 |
for bigram, count in char_bigrams.most_common(30)
|
| 182 |
])
|
| 183 |
st.dataframe(char_bigram_df)
|
|
@@ -193,7 +243,7 @@ if uploaded_file is not None:
|
|
| 193 |
char_trigrams[trigram] += 1
|
| 194 |
|
| 195 |
char_trigram_df = pd.DataFrame([
|
| 196 |
-
{'Trigram': ''.join(trigram), 'Count': count}
|
| 197 |
for trigram, count in char_trigrams.most_common(30)
|
| 198 |
])
|
| 199 |
st.dataframe(char_trigram_df)
|
|
@@ -206,7 +256,7 @@ if uploaded_file is not None:
|
|
| 206 |
word_bigrams[bigram] += 1
|
| 207 |
|
| 208 |
word_bigram_df = pd.DataFrame([
|
| 209 |
-
{'Word1': bigram[0], 'Word2': bigram[1], 'Count': count}
|
| 210 |
for bigram, count in word_bigrams.most_common(20)
|
| 211 |
])
|
| 212 |
st.dataframe(word_bigram_df)
|
|
@@ -219,7 +269,10 @@ if uploaded_file is not None:
|
|
| 219 |
word_trigrams[trigram] += 1
|
| 220 |
|
| 221 |
word_trigram_df = pd.DataFrame([
|
| 222 |
-
{'Word1': trigram[0],
|
|
|
|
|
|
|
|
|
|
| 223 |
for trigram, count in word_trigrams.most_common(20)
|
| 224 |
])
|
| 225 |
st.dataframe(word_trigram_df)
|
|
@@ -370,9 +423,11 @@ if uploaded_file is not None:
|
|
| 370 |
st.subheader("Overall Character Frequency")
|
| 371 |
all_chars_flat = [char for chars in chars_list for char in chars]
|
| 372 |
char_freq = Counter(all_chars_flat)
|
|
|
|
| 373 |
|
| 374 |
fig_freq = plt.figure(figsize=(12, 6))
|
| 375 |
char_freq_df = pd.DataFrame(char_freq.most_common(), columns=['Character', 'Count'])
|
|
|
|
| 376 |
plt.bar(char_freq_df['Character'], char_freq_df['Count'])
|
| 377 |
plt.title("Character Frequency Distribution")
|
| 378 |
plt.xlabel("Character")
|
|
@@ -380,6 +435,7 @@ if uploaded_file is not None:
|
|
| 380 |
plt.xticks(rotation=45)
|
| 381 |
st.pyplot(fig_freq)
|
| 382 |
st.dataframe(char_freq_df)
|
|
|
|
| 383 |
|
| 384 |
# Character Position Heatmap
|
| 385 |
st.subheader("Character Position Heatmap")
|
|
@@ -477,7 +533,9 @@ if uploaded_file is not None:
|
|
| 477 |
ngrams[ngram] += 1
|
| 478 |
|
| 479 |
ngram_df = pd.DataFrame([
|
| 480 |
-
{'Pattern': ''.join(
|
|
|
|
|
|
|
| 481 |
for ngram, count in ngrams.most_common(30)
|
| 482 |
])
|
| 483 |
st.dataframe(ngram_df)
|
|
|
|
| 10 |
|
| 11 |
st.set_page_config(layout="wide")
|
| 12 |
|
| 13 |
+
# Add custom CSS for floating image
|
| 14 |
+
st.markdown("""
|
| 15 |
+
<style>
|
| 16 |
+
.floating-image {
|
| 17 |
+
position: fixed;
|
| 18 |
+
bottom: 20px;
|
| 19 |
+
right: 20px;
|
| 20 |
+
z-index: 9999;
|
| 21 |
+
width: 150px;
|
| 22 |
+
height: auto;
|
| 23 |
+
border-radius: 10px;
|
| 24 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3);
|
| 25 |
+
transition: transform 0.3s ease;
|
| 26 |
+
}
|
| 27 |
+
.floating-image:hover {
|
| 28 |
+
transform: scale(1.1);
|
| 29 |
+
}
|
| 30 |
+
</style>
|
| 31 |
+
""", unsafe_allow_html=True)
|
| 32 |
+
|
| 33 |
def parse_voynich_word(word):
|
| 34 |
"""Parse a Voynich word into individual characters - NO assumptions about digraphs"""
|
| 35 |
if not word or word.strip() == '':
|
|
|
|
| 163 |
|
| 164 |
st.title("Voynich Manuscript Analyzer")
|
| 165 |
st.write("Upload your CSV file to discover potential patterns and character distributions.")
|
| 166 |
+
|
| 167 |
+
# Add floating image uploader
|
| 168 |
+
floating_image_file = st.file_uploader("Upload a floating image (optional)",
|
| 169 |
+
type=['png', 'jpg', 'jpeg', 'gif'],
|
| 170 |
+
key="floating_image")
|
| 171 |
+
|
| 172 |
+
if floating_image_file is not None:
|
| 173 |
+
# Convert image to base64 for embedding
|
| 174 |
+
import base64
|
| 175 |
+
image_bytes = floating_image_file.read()
|
| 176 |
+
image_b64 = base64.b64encode(image_bytes).decode()
|
| 177 |
+
|
| 178 |
+
# Display floating image
|
| 179 |
+
st.markdown(f"""
|
| 180 |
+
<img src="data:image/png;base64,{image_b64}" class="floating-image" alt="Floating image">
|
| 181 |
+
""", unsafe_allow_html=True)
|
| 182 |
|
| 183 |
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
| 184 |
|
| 185 |
if uploaded_file is not None:
|
| 186 |
+
# Read the entire file as text first
|
| 187 |
+
uploaded_file.seek(0)
|
| 188 |
+
content = uploaded_file.read().decode('utf-8')
|
| 189 |
+
|
| 190 |
+
# Split into lines (handle both \n and \r\n)
|
| 191 |
+
lines = content.replace('\r\n', '\n').replace('\r', '\n').strip().split('\n')
|
| 192 |
+
# Filter out empty lines - only keep lines with actual content
|
| 193 |
+
lines = [line for line in lines if line.strip()]
|
| 194 |
+
data = [line.split(',') for line in lines]
|
| 195 |
+
|
| 196 |
+
# Create DataFrame from parsed data
|
| 197 |
+
df = pd.DataFrame(data)
|
| 198 |
+
|
| 199 |
words, chars_list, char_positions, char_connections, word_positions, line_word_map = analyze_csv(df)
|
| 200 |
|
| 201 |
st.subheader("Basic Statistics")
|
|
|
|
| 224 |
char_bigrams[bigram] += 1
|
| 225 |
|
| 226 |
char_bigram_df = pd.DataFrame([
|
| 227 |
+
{'Bigram': ''.join(str(c) for c in bigram),
|
| 228 |
+
'Char1': str(bigram[0]),
|
| 229 |
+
'Char2': str(bigram[1]),
|
| 230 |
+
'Count': int(count)}
|
| 231 |
for bigram, count in char_bigrams.most_common(30)
|
| 232 |
])
|
| 233 |
st.dataframe(char_bigram_df)
|
|
|
|
| 243 |
char_trigrams[trigram] += 1
|
| 244 |
|
| 245 |
char_trigram_df = pd.DataFrame([
|
| 246 |
+
{'Trigram': ''.join(str(c) for c in trigram), 'Count': int(count)}
|
| 247 |
for trigram, count in char_trigrams.most_common(30)
|
| 248 |
])
|
| 249 |
st.dataframe(char_trigram_df)
|
|
|
|
| 256 |
word_bigrams[bigram] += 1
|
| 257 |
|
| 258 |
word_bigram_df = pd.DataFrame([
|
| 259 |
+
{'Word1': str(bigram[0]), 'Word2': str(bigram[1]), 'Count': int(count)}
|
| 260 |
for bigram, count in word_bigrams.most_common(20)
|
| 261 |
])
|
| 262 |
st.dataframe(word_bigram_df)
|
|
|
|
| 269 |
word_trigrams[trigram] += 1
|
| 270 |
|
| 271 |
word_trigram_df = pd.DataFrame([
|
| 272 |
+
{'Word1': str(trigram[0]),
|
| 273 |
+
'Word2': str(trigram[1]),
|
| 274 |
+
'Word3': str(trigram[2]),
|
| 275 |
+
'Count': int(count)}
|
| 276 |
for trigram, count in word_trigrams.most_common(20)
|
| 277 |
])
|
| 278 |
st.dataframe(word_trigram_df)
|
|
|
|
| 423 |
st.subheader("Overall Character Frequency")
|
| 424 |
all_chars_flat = [char for chars in chars_list for char in chars]
|
| 425 |
char_freq = Counter(all_chars_flat)
|
| 426 |
+
total_chars = len(all_chars_flat)
|
| 427 |
|
| 428 |
fig_freq = plt.figure(figsize=(12, 6))
|
| 429 |
char_freq_df = pd.DataFrame(char_freq.most_common(), columns=['Character', 'Count'])
|
| 430 |
+
char_freq_df['Percentage'] = (char_freq_df['Count'] / total_chars * 100).round(2)
|
| 431 |
plt.bar(char_freq_df['Character'], char_freq_df['Count'])
|
| 432 |
plt.title("Character Frequency Distribution")
|
| 433 |
plt.xlabel("Character")
|
|
|
|
| 435 |
plt.xticks(rotation=45)
|
| 436 |
st.pyplot(fig_freq)
|
| 437 |
st.dataframe(char_freq_df)
|
| 438 |
+
st.markdown(get_download_link_csv(char_freq_df, "character_frequency.csv"), unsafe_allow_html=True)
|
| 439 |
|
| 440 |
# Character Position Heatmap
|
| 441 |
st.subheader("Character Position Heatmap")
|
|
|
|
| 533 |
ngrams[ngram] += 1
|
| 534 |
|
| 535 |
ngram_df = pd.DataFrame([
|
| 536 |
+
{'Pattern': ''.join(str(c) for c in ngram),
|
| 537 |
+
'Count': int(count),
|
| 538 |
+
'Percentage': f"{count/len(chars_list)*100:.2f}%"}
|
| 539 |
for ngram, count in ngrams.most_common(30)
|
| 540 |
])
|
| 541 |
st.dataframe(ngram_df)
|