Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,10 @@ import os
|
|
| 10 |
from wordcloud import WordCloud
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
import pkg_resources
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 15 |
font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
|
|
@@ -89,6 +93,40 @@ def get_cached_embeddings(text, tokenizer, model):
|
|
| 89 |
"""Cache embeddings to avoid recomputation"""
|
| 90 |
return get_embedding_for_text(text, tokenizer, model)
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
def split_text(text, max_length=512):
|
| 94 |
"""Split text into chunks of maximum token length while preserving word boundaries."""
|
|
@@ -430,7 +468,7 @@ if uploaded_file is not None:
|
|
| 430 |
if summaries:
|
| 431 |
st.success("Analysis complete!")
|
| 432 |
|
| 433 |
-
tab1, tab2 = st.tabs(["Country Summaries", "Global Topics"])
|
| 434 |
|
| 435 |
with tab1:
|
| 436 |
for summary in summaries:
|
|
@@ -463,7 +501,10 @@ if uploaded_file is not None:
|
|
| 463 |
words = topic_model.get_topic(row['Topic'])
|
| 464 |
topic_name = " | ".join([word for word, _ in words[:5]])
|
| 465 |
st.write(f"• Topic {row['Topic']}: {topic_name} ({row['Count']} poems)")
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
| 467 |
except Exception as e:
|
| 468 |
st.error(f"Error processing file: {str(e)}")
|
| 469 |
|
|
|
|
| 10 |
from wordcloud import WordCloud
|
| 11 |
import matplotlib.pyplot as plt
|
| 12 |
import pkg_resources
|
| 13 |
+
import folium
|
| 14 |
+
from folium.plugins import HeatMap
|
| 15 |
+
import country_converter as coco
|
| 16 |
+
from streamlit_folium import folium_static
|
| 17 |
|
| 18 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 19 |
font_path = os.path.join(current_dir, "ArabicR2013-J25x.ttf")
|
|
|
|
| 93 |
"""Cache embeddings to avoid recomputation"""
|
| 94 |
return get_embedding_for_text(text, tokenizer, model)
|
| 95 |
|
| 96 |
+
def create_theme_map(summaries, topic_model):
|
| 97 |
+
"""Create an interactive map showing theme distributions across countries"""
|
| 98 |
+
# Create a base map centered on the Arab world
|
| 99 |
+
m = folium.Map(location=[25, 45], zoom_start=4)
|
| 100 |
+
|
| 101 |
+
# Convert country names to coordinates
|
| 102 |
+
cc = coco.CountryConverter()
|
| 103 |
+
|
| 104 |
+
for summary in summaries:
|
| 105 |
+
try:
|
| 106 |
+
# Get country coordinates
|
| 107 |
+
country_iso = cc.convert(names=[summary['country']], to='ISO2')
|
| 108 |
+
country_data = cc.convert(names=[summary['country']], to='name_short')
|
| 109 |
+
|
| 110 |
+
# Create popup content with theme information
|
| 111 |
+
popup_content = f"""
|
| 112 |
+
<h4>{summary['country']}</h4>
|
| 113 |
+
<b>Top Themes:</b><br>
|
| 114 |
+
{'<br>'.join([f"• {topic['topic']}: {topic['count']}"
|
| 115 |
+
for topic in summary['top_topics'][:5]])}
|
| 116 |
+
"""
|
| 117 |
+
|
| 118 |
+
# Add marker for each country
|
| 119 |
+
folium.CircleMarker(
|
| 120 |
+
location=[cc.convert(country_iso, to='latitude')[0],
|
| 121 |
+
cc.convert(country_iso, to='longitude')[0]],
|
| 122 |
+
radius=20,
|
| 123 |
+
popup=folium.Popup(popup_content, max_width=300),
|
| 124 |
+
color='red',
|
| 125 |
+
fill=True,
|
| 126 |
+
fill_opacity=0.7
|
| 127 |
+
).add_to(m)
|
| 128 |
+
|
| 129 |
+
return m
|
| 130 |
|
| 131 |
def split_text(text, max_length=512):
|
| 132 |
"""Split text into chunks of maximum token length while preserving word boundaries."""
|
|
|
|
| 468 |
if summaries:
|
| 469 |
st.success("Analysis complete!")
|
| 470 |
|
| 471 |
+
tab1, tab2, tab3 = st.tabs(["Country Summaries", "Global Topics", "Theme Map"])
|
| 472 |
|
| 473 |
with tab1:
|
| 474 |
for summary in summaries:
|
|
|
|
| 501 |
words = topic_model.get_topic(row['Topic'])
|
| 502 |
topic_name = " | ".join([word for word, _ in words[:5]])
|
| 503 |
st.write(f"• Topic {row['Topic']}: {topic_name} ({row['Count']} poems)")
|
| 504 |
+
with tab3:
|
| 505 |
+
st.subheader("Thematic Distribution Map")
|
| 506 |
+
theme_map = create_theme_map(summaries, topic_model)
|
| 507 |
+
folium_static(theme_map)
|
| 508 |
except Exception as e:
|
| 509 |
st.error(f"Error processing file: {str(e)}")
|
| 510 |
|