Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from datasets import load_dataset | |
| import json | |
| from wordcloud import WordCloud | |
| import matplotlib.pyplot as plt | |
| import networkx as nx | |
| from pyvis.network import Network | |
| import streamlit.components.v1 as components | |
| # main layout | |
| HEIGHT = 800 | |
| st.set_page_config(layout="wide") | |
| st.title("Reddit mental map 🧠") | |
| col1, col2, col3 = st.columns([1, 1, 2]) | |
| with col2: | |
| upper_panel = st.container() | |
| middle_panel = st.container() | |
| lower_panel = st.container() | |
| st.sidebar.title("Reddit mental map 🧠") | |
| st.sidebar.write("This app is a mental map of Reddit posts related to:") | |
| st.sidebar.markdown( | |
| """ | |
| - Attention-deficit/hyperactivity disorder (ADHD) | |
| - Aspergers | |
| - Depression | |
| - Obsessive-compulsive disorder (OCD) | |
| - Post-traumatic stress disorder (PTSD) | |
| """ | |
| ) | |
| st.sidebar.write( | |
| "The map aims to display a glimpse of :red-background[personal point of views of people who navigate through their mental wellbeing journey]." | |
| ) | |
| st.sidebar.header("Update mental map ✨") | |
| condition = st.sidebar.selectbox( | |
| "Select a condition", ["ADHD", "Aspergers", "Depression", "OCD", "PTSD"] | |
| ) | |
| st.sidebar.header("References:") | |
| st.sidebar.markdown( | |
| "Hugging Face datasets: [reddit_mental_health_posts] (https://huggingface.co/datasets/solomonk/reddit_mental_health_posts)" | |
| ) | |
| st.sidebar.markdown( | |
| "Semantic role labeling code adapted from [FS Ndzomga's Medium] (https://medium.com/thoughts-on-machine-learning/building-knowledge-graphs-with-spacy-networkx-and-matplotlib-a-glimpse-into-semantic-role-e49c9dbe26b4)" | |
| ) | |
| # data loader | |
| dataset = load_dataset("solomonk/reddit_mental_health_posts") | |
| df = dataset["train"].to_pandas() | |
| if condition == "ADHD": | |
| df = df[df["subreddit"] == "ADHD"] | |
| json_file = "data/adhd_clean.json" | |
| elif condition == "Aspergers": | |
| df = df[df["subreddit"] == "aspergers"] | |
| json_file = "data/aspergers_clean.json" | |
| elif condition == "Depression": | |
| df = df[df["subreddit"] == "depression"] | |
| json_file = "data/depression_clean.json" | |
| elif condition == "OCD": | |
| df = df[df["subreddit"] == "OCD"] | |
| json_file = "data/ocd_clean.json" | |
| elif condition == "PTSD": | |
| df = df[df["subreddit"] == "ptsd"] | |
| json_file = "data/ptsd_clean.json" | |
| with open(json_file, "r") as f: # Change by diagnosis | |
| srl_results = json.load(f) | |
| subjects = " ".join( | |
| value for d in srl_results if "subjects" in d for value in d["subjects"] | |
| ) | |
| verbs = " ".join(value for d in srl_results if "verbs" in d for value in d["verbs"]) | |
| objects = " ".join( | |
| value for d in srl_results if "objects" in d for value in d["objects"] | |
| ) | |
| # dataframe | |
| with col1: | |
| body = df["body"][~df["body"].isin(["[removed]", "[deleted]"])] | |
| event = st.dataframe( | |
| body, | |
| use_container_width=True, | |
| height=HEIGHT, | |
| hide_index=True, | |
| on_select="rerun", | |
| selection_mode="single-row", | |
| ) | |
| # word cloud | |
| stopwords = [ | |
| "day", | |
| "hour", | |
| "hours", | |
| "know", | |
| "month", | |
| "talk", | |
| "thing", | |
| "things", | |
| "think", | |
| "time", | |
| "try", | |
| "want", | |
| "year", | |
| ] | |
| def generate_better_wordcloud(data, mask=None): | |
| cloud = WordCloud( | |
| scale=3, | |
| max_words=150, | |
| colormap="RdGy", | |
| mask=mask, | |
| background_color="white", | |
| stopwords=stopwords, | |
| collocations=True, | |
| ).generate_from_text(data) | |
| fig = plt.figure() | |
| plt.imshow(cloud) | |
| plt.axis("off") | |
| return fig | |
| with upper_panel: | |
| st.subheader("Subjects") | |
| figs = generate_better_wordcloud(subjects) | |
| st.pyplot(figs) | |
| with middle_panel: | |
| st.subheader("Verbs") | |
| figv = generate_better_wordcloud(verbs) | |
| st.pyplot(figv) | |
| with lower_panel: | |
| st.subheader("Objects") | |
| figo = generate_better_wordcloud(objects) | |
| st.pyplot(figo) | |
| # network | |
| def build_and_plot_knowledge_graph_pyvis(result): | |
| G = nx.DiGraph() | |
| subjects = result["subjects"] | |
| verbs = result["verbs"] | |
| objects = result["objects"] | |
| indirect_objects = result["indirect_objects"] | |
| for subject in subjects: | |
| for verb in verbs: | |
| for obj in objects: | |
| G.add_edge(subject, obj, label=verb) | |
| for ind_obj in indirect_objects: | |
| G.add_edge(subject, ind_obj, label=verb) | |
| pos = nx.spring_layout(G, seed=42, k=0.5, iterations=50) | |
| nx.draw( | |
| G, | |
| pos, | |
| with_labels=True, | |
| node_color="#FF746C", | |
| node_size=2000, | |
| font_size=12, | |
| font_color="black", | |
| font_weight="normal", | |
| arrows=True, | |
| ) | |
| edge_labels = nx.get_edge_attributes(G, "label") | |
| nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels) | |
| net = Network() | |
| net.repulsion() | |
| net.from_nx(G) | |
| fig = plt.gcf() | |
| return fig | |
| with col3: | |
| try: | |
| st.subheader("Mental map") | |
| st.write( | |
| "This is a 2D knowledge graph from simple :red-background[semantic role labeling] of the reddit post using spaCy, NetworkX, and Matplotlib. :red-background[Tick the box next to the row of interest to display the mental map of the individual post]. The graph shows the relationship between the subject, verb, and object at singular level, to complement the full-level overview of the word clouds. It takes a moment to load the data and if the image does not show, it is because some of the posts are deleted or removed in the original dataset." | |
| ) | |
| person = int(event.selection.rows[0]) | |
| plt.clf() | |
| fign = build_and_plot_knowledge_graph_pyvis(srl_results[person]) | |
| st.pyplot(fign) | |
| except: | |
| pass | |