Spaces:

praisethefool
/

dca-editor

Sleeping

App Files Files Community

praisethefool commited on Apr 14, 2025

Commit

b467d9f

verified ·

1 Parent(s): 8949568

Create app.py

Browse files

Files changed (1) hide show

app.py +342 -0

app.py ADDED Viewed

	@@ -0,0 +1,342 @@

+from datasets import load_dataset
+import gradio as gr
+import pandas as pd
+dataset = load_dataset(
+    "praisethefool/dca-distroid_digest-issue_44",
+    keep_default_na=False)
+df = pd.DataFrame(dataset['train'])
+for x in df.columns:
+  if 'fields' in x:
+    y = x.replace('fields.', '')
+    y = y.lower()
+    df.rename({x:y}, axis = 'columns', inplace=True)
+  else:
+    y = x.lower()
+    df.rename({x:y}, axis = 'columns', inplace=True)
+description = """
+# Overview
+This Gradio demo was developed to show how end-users could be empowered to create their own personalized feeds by customizing algorithmic recommendation systems.
+In this demo, I focused on how readers of the Distroid Digest could personalize the [Distroid Digest](https://distroid.substack.com/) to fit their needs by customizing the Distroid Curator Algorithm (DCA),
+the (planned) curation algorithm used by curators for curating Works collected in the Distroid Catalogue Knowledge Graph (DCKG) into the Distroid Digest newsletter issues.
+The DCA’s objective is to produce a ranked feed of items (here being Works in the DCKG) that increase understanding of frontier information.
+More specifically, increasing understanding of how to diagnose and improve the human-technology relationship.
+We assume that items with higher scores have the potential to provide readers with a greater understanding of frontier information than items with a lower score.
+For DCA Version 0.1 (V0.1), Works are rated based on the following six quality signals:
+1. ELI5: "The ability to explain complex topics in lay-man's terms",
+2. Implications: "The real, imagined, or theorized positive, neutral, or negative outcomes (or impacts) of frontier information (or discoveries, technologies, and cultures) on society, environment, economy, or in other areas." ,
+3. Idea Machine Intersectionality: "The number of idea machines the work is classified under.",
+4. Novelty: "New knowledge that moves the knowledge frontier.",
+5. Informative: "The content improved my understanding of a topic.", and
+6. Evergreen: "Knowledge that is applicable regardless of time or location".
+# Dataset
+I used the Works curated in [Distroid Digest Issue 44](https://distroid.substack.com/p/digest-issue-44-how-bluesky-works) for this demo.
+# Objective Function
+In this demo, the objective function is a weighted sum formula, where weights
+between zero and twenty (0-20) are applied to the ratings of each signal.
+# Functionality
+1. Users can customize the feed by setting the weights from zero to twenty (0-20) for each marker described above.
+2. Users can set a minimum DCA score for Works to be added to their feed.
+# Tips & Tricks
+If you think a signal should not be included in your feed, you can set that marker's weight to zero (0).
+# Learn more
+You can read more about the early work on the DCA [here](https://ledgerback.pubpub.org/pub/9ibht7wp/release/8).
+For background information on recommender systems, please read [Recommender Systems 101](https://kgi.georgetown.edu/wp-content/uploads/2025/02/Recommender-Systems-101.pdf).
+# Related Work
+Related work on creating alternative feeds and newsletters can be found below:
+1. [Fedi-Feed](https://foryoufeed.vercel.app/login)
+2. [News Minimalist](https://www.newsminimalist.com/)
+3. [Building a Social Media Algorithm That Actually Promotes Societal Values](https://hai.stanford.edu/news/building-social-media-algorithm-actually-promotes-societal-values)
+4. [PDN Pro-Social with Smitha Milli: Ranking by User Value](https://www.youtube.com/watch?v=6ltsAT5RUrI)
+# Outputs
+1. DCA Objective Function: The current objective function after the parameters are set.
+2. DCA Scores: A table of Works sorted by their DCA score in the Score column. Also includes the Work's title and url.
+3. Scores per Signal: A table showing the scores for each signal after setting the weights.
+# Caveats
+1. The Works are pre-rated, so you cannot edit the ratings per marker.
+2. The Weights and Minimum Score ranges are pre-set.
+3. In the Scores per Signal tab, Idea Machine Interserctionality has been shortened to 'imi'.
+"""
+def grad_wg_int(
+    w_nov, #Novelty Wgt
+    w_eve, #Evergreen,
+    w_inf, #Informative,
+    w_imi, #Implications
+    w_eli, #ELI5
+    w_imp, #Implications
+    min_score
+  ):
+  muse = []
+  weights = {
+      "w_nov": w_nov,
+      "w_eve": w_eve,
+      "w_inf": w_inf,
+      "w_imi": w_imi,
+      "w_eli": w_eli,
+      "w_imp": w_imp,
+  }
+  dc_algo_mk = zip(df['novelty'],
+                  df['evergreen'],
+                  df['informative'],
+                  df['idea machine intersectionality'],
+                  df['eli5'],
+                  df['implications'],
+                  df['title'],
+                  df['url'],
+                  )
+  for m_nov, m_eve, m_inf, m_imi, m_eli, m_imp, title, url in dc_algo_mk:
+    score_nov = weights['w_nov'] * int(m_nov)
+    score_eve = weights['w_eve'] * int(m_eve)
+    score_inf = weights['w_inf'] * int(m_inf)
+    score_imi = weights['w_imi'] * int(m_imi)
+    score_eli = weights['w_eli'] * int(m_eli)
+    score_imp = weights['w_imp'] * int(m_imp)
+    # need to save the weight and score for each marker into
+    # a table with key included
+    rank_sum = (score_nov +
+                score_eve +
+                score_inf +
+                score_imi +
+                score_eli +
+                score_imp)
+    rank_sum = round(float(rank_sum), 2)
+    score_rank = {
+        "Score": rank_sum,
+        "Title": title,
+        'URL': url,
+    }
+    muse.append(score_rank)
+  tug = pd.DataFrame(muse)
+  tug = tug.query(f"Score >= {min_score}")
+  tug.sort_values('Score', ascending=False, inplace=True)
+  return tug
+def grad_wg_int_scr(
+    w_nov, #Novelty Wgt
+    w_eve, #Evergreen,
+    w_inf, #Informative,
+    w_imi, #Idea Machine Intersectionality
+    w_eli, #ELI5
+    w_imp, #Implications
+  ):
+  weights = {
+      "w_nov": w_nov,
+      "w_eve": w_eve,
+      "w_inf": w_inf,
+      "w_imi": w_imi,
+      "w_eli": w_eli,
+      "w_imp": w_imp,
+  }
+  df_ = df.copy()
+  df_['novelty'] = df['novelty'] * weights['w_nov']
+  df_['evergreen'] = weights['w_eve'] * df['evergreen']
+  df_['informative'] = weights['w_inf'] * df['informative']
+  df_['idea machine intersectionality'] = df['idea machine intersectionality'] * weights['w_imi']
+  df_['eli5'] = df['eli5'] * weights['w_eli']
+  df_['implications'] = df['implications'] * weights['w_imp']
+  df_.rename({'idea machine intersectionality': 'imi'}, axis='columns', inplace=True)
+  df_.drop(['url', 'likeable'], axis='columns', inplace=True)
+  df_ = df_.round(1)
+  return df_
+def grad_wg_int_form(
+    w_nov, #Novelty Wgt
+    w_eve, #Evergreen,
+    w_inf, #Informative,
+    w_imi, #Implications
+    w_eli, #ELI5
+    w_imp, #Implications
+  ):
+  weights = {
+      "w_nov": w_nov,
+      "w_eve": w_eve,
+      "w_inf": w_inf,
+      "w_imi": w_imi,
+      "w_eli": w_eli,
+      "w_imp": w_imp,
+  }
+  formula_a = f"""
+  ({weights['w_nov']} * Novelty) + ({weights['w_eve']} * Evergreen) + \n\n
+  ({weights['w_inf']} * Informative) + ({weights['w_eli']} * ELI5) + \n\n
+  ({weights['w_imp']} * Implications) + ({weights['w_imi']} * Idea Machine Intersectionality)
+  """
+  return formula_a
+with gr.Blocks(fill_width=True) as demo:
+    gr.Markdown('# Welcome to the DCA Personalized Feed Demo')
+    with gr.Row():
+      with gr.Accordion():
+        gr.Markdown(description)
+    with gr.Row():
+      with gr.Sidebar():
+        gr.Markdown("### Customize")
+        tune_eli = gr.Slider(0.00, 20.00, value=1, label="ELI5 Weight", info="Choose between 0 and 20")
+        tune_evg = gr.Slider(0.00, 20.00, value=1, label="Evergreen Weight", info="Choose between 0 and 20")
+        tune_inf = gr.Slider(0.00, 20.00, value=1, label="Informative Weight", info="Choose between 0 and 20")
+        tune_imp = gr.Slider(0.00, 20.00, value=1, label="Implications Weight", info="Choose between 0 and 20")
+        tune_nov = gr.Slider(0.00, 20.00, value=1, label="Novelty Weight", info="Choose between 0 and 20")
+        tune_imi = gr.Slider(0.00, 20.00, value=1, label="Idea Machine Intersectionality Weight", info="Choose between 0 and 20")
+        tune_min = gr.Slider(0.00, 50.00, value=1, label="Minimum DCA Score", info="Choose between 0 and 50")
+        text_button = gr.Button(value="Set Parameters")
+        clear_button = gr.ClearButton(value="Clear Parameters")
+      with gr.Column(scale=3):
+        form_plot = gr.Label(label="DCA Objective Function")
+        text_button.click(grad_wg_int_form,
+                        inputs=[
+                            tune_nov,
+                            tune_evg,
+                            tune_inf,
+                            tune_eli,
+                            tune_imi,
+                            tune_imp,
+                        ],
+                        outputs=[form_plot])
+        with gr.Tab("Scores per Signal"):
+          output_df = gr.DataFrame(
+              wrap = True,
+              show_search='filter',
+              show_copy_button = True,
+              show_fullscreen_button=True )
+          text_button.click(
+              grad_wg_int_scr,
+              inputs=[
+                tune_nov,
+                tune_evg,
+                tune_inf,
+                tune_eli,
+                tune_imi,
+                tune_imp,
+              ],
+              outputs=[output_df])
+        with gr.Tab("Feed"):
+          output_df = gr.DataFrame(
+              wrap = True,
+              show_search='filter',
+              show_copy_button = True,
+              show_fullscreen_button=True )
+          text_button.click(
+              grad_wg_int,
+              inputs=[
+                tune_nov,
+                tune_evg,
+                tune_inf,
+                tune_eli,
+                tune_imi,
+                tune_imp,
+                tune_min,
+              ],
+              outputs=[output_df])
+          clear_button.add([
+              tune_eli,
+              tune_evg,
+              tune_inf,
+              tune_imp,
+              tune_nov,
+              tune_imi,
+              tune_min,
+          ])
+if __name__ == "__main__":
+    demo.launch()