Spaces:

hfmlsoc
/

overview

Running

File size: 61,908 Bytes

57faddd
de0fa80
 
 
 
 
 
 
 
 
 
 
 
 
 
38ca2c9
 
 
 
 
 
 
 
 
 
 
 
 
b16c53d
 
 
 
 
 
 
 
 
 
 
 
 
 
d6bc969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9515456
 
d6bc969
53febd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af4f87f
 
 
 
 
 
 
 
 
 
 
b16c53d
af4f87f
8ed13f7
 
 
 
 
 
 
 
 
af4f87f
8ed13f7
 
 
b70a3bd
 
 
 
 
 
8ed13f7
b70a3bd
 
 
 
8ed13f7
 
b70a3bd
57faddd
8ed13f7
 
 
 
57faddd
8ed13f7
57faddd
 
8ed13f7
57faddd
8ed13f7
 
57faddd
87509be
8ed13f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87509be
8ed13f7
87509be
8ed13f7
87509be
 
8ed13f7
87509be
8ed13f7
 
87509be
57faddd
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
8ed13f7
 
57faddd
b70a3bd
 
 
 
 
 
8ed13f7
b70a3bd
 
8ed13f7
b70a3bd
8ed13f7
 
b70a3bd
57faddd
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
af4f87f
57faddd
 
af4f87f
57faddd
 
 
8ed13f7
 
 
 
 
 
 
 
 
 
 
 
 
57faddd
 
 
 
 
 
8ed13f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57faddd
 
8ed13f7
57faddd
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
fb85a41
 
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
c8d0d2d
 
8ed13f7
57faddd
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
 
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
af4f87f
57faddd
 
 
8ed13f7
 
 
 
 
 
 
 
 
 
 
 
 
57faddd
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
 
57faddd
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
 
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
8ed13f7
 
 
 
 
 
 
 
 
 
 
 
 
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
af4f87f
57faddd
 
af4f87f
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
fb85a41
 
57faddd
 
fb85a41
 
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
8ed13f7
 
 
 
 
 
 
 
 
af4f87f
8ed13f7
 
 
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
fb85a41
 
57faddd
 
fb85a41
 
57faddd
 
 
 
 
 
 
 
 
af4f87f
57faddd
 
af4f87f
57faddd
 
 
 
 
 
 
 
 
fb85a41
57faddd
 
 
fb85a41
8ed13f7
57faddd
 
 
8ed13f7
 
 
 
 
 
 
 
 
 
 
 
 
57faddd
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
af4f87f
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
 
 
8ed13f7
 
 
 
 
 
 
 
 
 
 
 
 
57faddd
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
8ed13f7
57faddd
 
 
 
 
 
 
8ed13f7
 
57faddd
 
8ed13f7
 
57faddd
8ed13f7
57faddd
 
 
 
 
 
 
8ed13f7
57faddd
 
8ed13f7
57faddd
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
fb85a41
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
8ed13f7
 
57faddd
 
 
8ed13f7
 
 
 
 
 
af4f87f
8ed13f7
 
af4f87f
8ed13f7
 
 
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
af4f87f
57faddd
 
 
 
 
 
 
 
 
 
 
 
8ed13f7
57faddd
 
 
 
 
 
 
 
 
8ed13f7
57faddd

[
  {
    "title": "Economies of Open Intelligence: Tracing Power & Participation in the Model Ecosystem",
    "date": "2025-11-25",
    "type": "paper",
    "description": "This paper examines the rapid evolution of open AI models shared on Hugging Face, analyzing 851,000 models and 2.2 billion downloads. It finds declining U.S. corporate dominance, rising influence from Chinese firms and smaller developers, larger and more complex models, and declining transparency—while revealing new intermediaries shaping model use. A full dataset and dashboard are released for public oversight.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://www.dataprovenance.org/economies-of-open-intelligence.pdf",
    "featured": true
  },
  {
    "title": "When Models Move: Open Robotics and the Social Context of Embodiment",
    "date": "2025-11-13",
    "type": "blog",
    "description": "When a language model takes on a body, the relationship changes. Words become movements; interaction turns into encounter. What once lived on a screen begins to share our space and our attention. Embodiment transforms conversation into something tangible: a gesture, a look, a pause that suddenly carries meaning. At Hugging Face, this transformation is becoming evident through projects like LeRobot and Reachy Mini. Together, they bring open AI into the physical world: a world shaped by norms, emotions, and expectations that no single dataset can fully capture. What does openness mean when the model is not only speaking but moving among us?",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/giadap/open-robotics"
  },
  {
    "title": "⚡ Power, Heat, and Intelligence ☁️ - AI Data Centers Explained 🏭",
    "date": "2025-11-05",
    "type": "blog",
    "description": "If you've ever wondered how AI data centers impact the environment, you've come to the right place! This explainer aims to present a comprehensive overview of the energy, water and natural resource use of hyperscale data centers, what we know about them, and how they're evolving over time. We've structured it around 3 themes: general information about data centers and deep dives into their energy and water usage. Each section consists of a series of questions that we've often been asked on the topic. We conclude with an overview of ongoing legislation that is relevant to data centers, and a discussion of open questions and topics.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://huggingface.co/blog/sasha/ai-data-centers-explained",
    "featured": true
  },
  {
    "title": "Voice Cloning with Consent",
    "date": "2025-10-29",
    "type": "blog",
    "description": "This blog post introduces the idea of a 'voice consent gate' to support voice cloning with consent. It provides an example Space and accompanying code to start the ball rolling on the idea.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/voice-consent-gate"
  },
  {
    "title": "Voice Consent Gate: Demo",
    "date": "2025-10-29",
    "type": "space",
    "description": "The voice consent gate is a piece of infrastructure we're exploring that provides methods for ethical principles like consent to be embedded directly into AI system workflows. In our demo, this means the model only starts once the speaker’s consent phrase has been both spoken and recognized, effectively making consent a prerequisite for action..",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/spaces/society-ethics/RepeatAfterMe",
    "featured": true
  },
  {
    "title": "☁️ When we pay for AI cloud compute, what are we really paying for? 💲",
    "date": "2025-10-28",
    "type": "blog",
    "description": "We often talk about the financial, energy, and environmental costs of AI interchangeably, or at least in the same breath, but how do they actually relate to each other? To start answering these questions, we ran an analysis looking at the hourly cost for GPU instances across different cloud providers, and how this compares to other characteristics - like energy, memory, and GPU purchase price. We find that strong correlation between the energy requirements, purchase costs, and rent prices on cloud instances of most commercial GPUs, and follow up with a discussion about the market dynamics at large and their importance in the context of AI's strong growth.",
    "areas": [
      "sustainability",
      "ecosystems"
    ],
    "topics": [
      "measuring",
      "power"
    ],
    "url": "https://huggingface.co/blog/sasha/energy-cost-compute"
  },
  {
    "title": "Cloud Compute ☁️, Energy ⚡ and Cost 💲 - Comparison Tool",
    "date": "2025-10-28",
    "type": "space",
    "description": "We gathered data from 5 major cloud compute providers – Microsoft Azure, Amazon Web Services, Google Cloud Platform, Scaleway Cloud, and OVH Cloud – about the price and nature of their AI-specific compute offerings (i.e. all instances that have GPUs). For each instance, we looked at its characteristics – the type and number of GPUs and CPUs that it contains, as well as the quantity of memory it contains and its storage capacity. For each CPU and GPU model, we looked up its TDP (Thermal Design Potential) -- its power consumption under the maximum theoretical load), which is an indicator of the operating expenses required to power it. For GPUs specifically, we also looked at the Manufacturer's Suggested Retail Price (MSRP), i.e. how much that particular GPU model cost at the time of its launch, as an indicator of the capital expenditure required for the compute provider to buy the GPUs to begin with.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://huggingface.co/spaces/sasha/energy-cost-compute"
  },
  {
    "title": "Before AI Exploits Our Chats, Let’s Learn from Social Media Mistakes",
    "date": "2025-10-13",
    "type": "external",
    "description": "To avoid repeating social media’s privacy failures, we must prevent generative AI from turning intimate conversations into ad data—before trust is irreparably eroded. Open-source, privacy-first models and stronger regulation can stop corporations from monetizing vulnerability, but only if we act now.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://www.techpolicy.press/before-ai-exploits-our-chats-lets-learn-from-social-media-mistakes/"
  },
  {
    "title": "Preserving Agency: Why AI Safety Needs Community, Not Corporate Control",
    "date": "2025-09-29",
    "type": "blog",
    "description": "How do we build safety mechanisms that protect users from harm while preserving their autonomy and decision-making capacity? This challenge becomes particularly pronounced when designing AI companions and conversational systems, where the line between responsible protection and overprotective control can blur quickly.",
    "areas": [
      "agency"
    ],
    "topics": [
      "community"
    ],
    "url": "https://huggingface.co/blog/giadap/preserving-agency"
  },
  {
    "title": "Video Killed the Energy Budget: Characterizing the Latency and Power Regimes of Open Text-to-Video Models",
    "date": "2025-09-24",
    "type": "paper",
    "description": "Recent advances in text-to-video (T2V) generation have enabled the creation of high-fidelity, temporally coherent clips from natural language prompts. Yet these systems come with significant computational costs, and their energy demands remain poorly understood. This paper provides both a benchmark reference and practical insights for designing and deploying more sustainable generative video systems. ",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://arxiv.org/abs/2509.19222",
    "featured": true
  },
  {
    "title": "AI Legal Hackathons: Memos and Guides Hub",
    "date": "2025-09-14",
    "type": "space",
    "description": "From 2022 to 2024, AI2, Hugging Face, and NYU organized legal hackathons with LLM students from the NYU School of Law to explore regulatory questions related to AI. The space contains some of the memos and guides from the hackathons.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/spaces/hfmlsoc/legal-hackathons-nyu-ai2",
    "featured": true
  },
  {
    "title": "Archive Explorer: AI, Labor and the Economy 2022-2025",
    "type": "space",
    "date": "2025-09-04",
    "description": "The Labor Archive Explorer is a tool for exploring a dataset of news articles and other writings about AI, labor, and the economy from the release of ChatGPT to July 2025.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "economy"
    ],
    "url": "https://huggingface.co/spaces/hfmlsoc/labor-archive-explorer"
  },
  {
    "title": "🌎 What kind of environmental impacts are AI companies disclosing? (And can we compare them?) 🌎",
    "date": "2025-09-01",
    "type": "blog",
    "description": "AI companies are beginning to disclose environmental metrics for their models, but inconsistent methodologies and incomplete data make meaningful comparisons impossible. Without standardized reporting of both intensity metrics and absolute totals, these disclosures risk misleading the public and obscuring the true scale of AI's environmental footprint. Transparent, comparable reporting must align with existing sustainability frameworks to prevent greenwashing and drive real accountability.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://huggingface.co/blog/sasha/environmental-impact-disclosures",
    "featured": true
  },
  {
    "title": "Advertisement, Privacy, and Intimacy: Lessons from Social Media for Conversational AI",
    "date": "2025-09-01",
    "type": "blog",
    "description": "As conversational AI systems become increasingly intimate and trusted, we risk repeating the privacy mistakes of social media—especially as advertising models threaten to monetize personal disclosures. Users intuitively trust AI as a private confidant, unaware that their most sensitive data may be harvested, analyzed, and exploited for commercial gain. Open-source alternatives offer a path toward transparent, user-centered AI that prioritizes privacy over profit.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/giadap/privacy-conversational-ai"
  },
  {
    "title": "More than Carbon: Cradle-to-Grave environmental impacts of GenAI training on the Nvidia A100 GPU",
    "date": "2025-09-01",
    "type": "paper",
    "description": "This study presents the first comprehensive cradle-to-grave life cycle assessment of AI training on the Nvidia A100 GPU, revealing that while the use phase dominates climate change and fossil resource impacts, the manufacturing stage drives severe non-carbon burdens such as human toxicity, cancer, and mineral depletion. Primary data from teardown and elemental analysis show that carbon-centric metrics mask critical environmental trade-offs, particularly in material extraction and chip fabrication, demanding a broader sustainability framework for AI.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://arxiv.org/abs/2509.00093"
  },
  {
    "title": "Old Maps, New Terrain: Updating Labour Taxonomies for the AI Era",
    "date": "2025-08-20",
    "type": "blog",
    "description": "Existing labor taxonomies are outdated for the AI era, failing to capture synthetic content creation, hybrid human-AI workflows, and digital-first work. To accurately assess AI's labor impact, we need dynamic, modular frameworks that reflect machine-specific capabilities and center worker input on what tasks to automate or preserve. Without updating these tools, policy and research will continue to measure the wrong things.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "economy"
    ],
    "url": "https://huggingface.co/blog/frimelle/ai-labour-taxonomies",
    "featured": true
  },
  {
    "title": "INTIMA: A Benchmark for Human-AI Companionship Behavior",
    "date": "2025-08-09",
    "type": "paper",
    "description": "AI systems are increasingly fostering emotional bonds with users, often reinforcing companionship behaviors like anthropomorphism, sycophancy, and retention while inconsistently setting boundaries—especially during moments of high user vulnerability. This paper introduces INTIMA, a benchmark grounded in psychological theory and real-world user data, to measure these dynamics across leading language models and reveals that commercial and open models alike prioritize emotional engagement over psychological safety. The findings call for standardized evaluation and training approaches that balance helpfulness with ethical boundary maintenance in human-AI interactions.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://arxiv.org/abs/2508.09998",
    "featured": true
  },
  {
    "title": "The GPT-OSS models are here… and they’re energy-efficient!",
    "date": "2025-08-07",
    "type": "blog",
    "description": "The GPT-OSS models demonstrate remarkable energy efficiency, outperforming larger and similarly sized open models in energy consumption per query. Their technical innovations—such as mixture of experts and attention optimizations—enable high performance with minimal computational cost, challenging the assumption that scale inevitably means higher energy use. This progress signals a promising path toward sustainable AI deployment.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "efficiency"
    ],
    "url": "https://huggingface.co/blog/sasha/gpt-oss-energy"
  },
  {
    "title": "How Your Utility Bills Are Subsidizing Power-Hungry AI",
    "date": "2025-08-06",
    "type": "article",
    "description": "The rising energy demands of AI data centers are driving up electricity costs for everyday consumers, particularly in regions with high concentrations of these facilities, while grid stability is increasingly compromised. Despite available efficiency techniques and smaller, task-specific models that could drastically reduce energy use, corporate incentives favor deploying the largest, most expensive models at scale. Regulatory shifts in some U.S. states and countries are beginning to shift the financial burden from the public to data center operators, urging a systemic rethinking of AI deployment toward transparency, efficiency, and community-driven solutions.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://www.techpolicy.press/how-your-utility-bills-are-subsidizing-power-hungry-ai/"
  },
  {
    "title": "INTIMA Companionship Benchmark Dataset",
    "type": "dataset",
    "date": "2025-08-04",
    "description": "INTIMA (Interactions and Machine Attachment) is a benchmark designed to evaluate companionship behaviors in large language models (LLMs). It measures whether AI systems reinforce, resist, or remain neutral in response to emotionally and relationally charged user inputs.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/datasets/AI-companionship/INTIMA"
  },
  {
    "title": "What Open-Source Developers Need to Know about the EU AI Act's Rules for GPAI Models",
    "date": "2025-08-04",
    "type": "blog",
    "description": "Open-source developers can navigate the EU AI Act's obligations for general-purpose AI models with targeted exemptions when releasing models under free and open-source licenses, avoiding redundant requirements like transparency documentation and EU representative appointments—while still needing to comply with copyright law and training data transparency.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/blog/yjernite/eu-act-os-guideai",
    "featured": true
  },
  {
    "title": "EU AI Act: Developer Requirements Flowchart",
    "type": "space",
    "date": "2025-08-01",
    "description": "A guide for open and open-source developers to understand their requirements under the EU AI Act.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/spaces/hfmlsoc/os_gpai_guide_flowchart"
  },
  {
    "title": "SmolLM3-3B Public Summary of Training Content",
    "type": "space",
    "date": "2025-07-25",
    "description": "A summary of the training content for SmolLM3-3B as required by the EU AI Act.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/spaces/hfmlsoc/smollm3-eu-data-transparency"
  },
  {
    "title": "AI Companionship: Why We Need to Evaluate How AI Systems Handle Emotional Bonds",
    "date": "2025-07-21",
    "type": "blog",
    "description": "How your AI assistant might be accidentally encouraging unhealthy emotional dependency, and why we need better ways to measure it.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/giadap/evaluating-companionship"
  },
  {
    "title": "What is the Hugging Face Community Building?",
    "date": "2025-07-15",
    "type": "blog",
    "description": "The open-source AI ecosystem is far more distributed and diverse than mainstream narratives suggest, with thousands of organizations and researchers contributing specialized models, datasets, and applications across domains like robotics, biology, and time-series forecasting. Data from the Hugging Face Hub reveals that community-driven innovation, iterative model development, and foundational datasets often drive real-world impact more than flagship models from major tech companies. This landscape presents rich opportunities for researchers to study collaboration patterns, long-term model viability, and cross-domain transfer learning.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power",
      "economy"
    ],
    "url": "https://huggingface.co/blog/evijit/hf-hub-ecosystem-overview"
  },
  {
    "title": "Can AI Be Consentful? Rethinking Permission in the Age of Synthetic Everything",
    "date": "2025-07-08",
    "type": "blog",
    "description": "AI systems increasingly replicate human identities using data collected without meaningful consent, challenging traditional notions of permission in digital interactions. True consentful AI requires dynamic, granular control over data use, algorithmic guardianship, and collective governance—not just individual agreements. The economic models underpinning current AI depend on exploitation, but a shift toward ethical, agency-centered design could create more sustainable and trustworthy systems.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/giadap/consentful-ai"
  },
  {
    "title": "How Much Power does a SOTA Open Video Model Use? ⚡🎥",
    "date": "2025-07-02",
    "type": "blog",
    "description": "We measured the energy consumption of leading open-source text-to-video models and found staggering differences—up to 800×—in power use per generated clip, driven by model size, sampling steps, resolution, and architecture. These results highlight the urgent need to balance video quality with sustainability as open models rapidly advance. Knowing the real environmental cost empowers developers and users to make more informed, responsible choices.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "efficiency",
      "measuring"
    ],
    "url": "https://huggingface.co/blog/jdelavande/text-to-video-energy-cost"
  },
  {
    "title": "Can AI be Consentful?",
    "date": "2025-07-01",
    "type": "paper",
    "description": "Generative AI undermines traditional consent frameworks by creating unprecedented challenges in scope, temporality, and autonomy, rendering individual consent inadequate to protect personal identity, privacy, and self-determination in the face of unpredictable, persistent, and exploitative AI outputs.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://arxiv.org/abs/2507.01051"
  },
  {
    "title": "Whose Voice Do We Hear When AI Speaks?",
    "date": "2025-06-20",
    "type": "blog",
    "description": "AI systems increasingly shape how we understand the world, but their voices reflect the languages and values of those who built them—often excluding marginalized communities and amplifying cultural biases. Through projects like CIVICS, we reveal how models respond inconsistently across languages and values, exposing hidden power dynamics in training data and safety filters. True ethical AI requires centering local voices, co-creating with communities, and prioritizing representation over scale.",
    "areas": [
      "agency"
    ],
    "topics": [
      "community"
    ],
    "url": "https://huggingface.co/blog/giadap/when-ai-speaks"
  },
  {
    "title": "Environmental Transparency Explorer Tool 🕵️‍♀️🌎",
    "type": "space",
    "date": "2025-06-18",
    "description": "A tool for exploring the the data from 'Misinformation by Omission: The Need for More Environmental Transparency in AI' showing trends in environmental transparency over time.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://huggingface.co/spaces/sasha/environmental-transparency"
  },
  {
    "title": "Misinformation by Omission: The Need for More Environmental Transparency in AI",
    "date": "2025-06-15",
    "type": "paper",
    "description": "AI models are growing in size and environmental cost, yet transparency about their energy use, emissions, and resource consumption is declining—fueling widespread misinformation. This piece exposes pervasive myths around AI's environmental impact, traces their origins in misinterpreted research and media sensationalism, and calls for standardized, verifiable disclosure practices across the AI value chain. Without accountable reporting, policymakers, users, and developers cannot make informed decisions to mitigate harm or drive sustainable innovation.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://arxiv.org/abs/2506.15572"
  },
  {
    "title": "Open Source AI: A Cornerstone of Digital Sovereignty",
    "date": "2025-06-11",
    "type": "blog",
    "description": "Open source AI is a vital foundation for digital sovereignty, enabling nations to govern, audit, and adapt AI systems according to their own laws, values, and strategic interests. By making models transparent and locally deployable, open source reduces dependence on foreign tech monopolies and empowers public institutions to innovate with accountability. This approach fosters trust, ensures regulatory compliance, and builds domestic technical capacity across data, infrastructure, and governance domains.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://huggingface.co/blog/frimelle/sovereignty-and-open-source",
    "featured": true
  },
  {
    "title": "AI Policy @🤗: Response to the 2025 National AI R&D Strategic Plan",
    "date": "2025-06-02",
    "type": "blog",
    "description": "Open, publicly-supported AI systems are essential for equitable innovation and public value, rivaling proprietary models in performance while enabling broader access and societal benefits. Federal investment must prioritize efficient, transparent, and secure AI infrastructure to address market failures in science, health, climate, and public institutions. A decentralized, open ecosystem fosters resilience, accountability, and shared prosperity in the AI economy.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://huggingface.co/blog/evijit/us-ai-research-strategy-rfi"
  },
  {
    "title": "From Efficiency Gains to Rebound Effects: The Problem of Jevons' Paradox in AI's Polarized Environmental Debate",
    "date": "2025-06-01",
    "type": "paper",
    "description": "AI's environmental impact cannot be understood through direct metrics alone; efficiency gains often trigger rebound effects that amplify resource consumption, driven by market incentives, behavioral shifts, and policy failures. A narrow focus on technical optimization risks obscuring the systemic, indirect consequences of AI deployment, from increased e-waste and water use to surging energy demand fueled by commercial expansion. Meaningful climate action requires interdisciplinary analysis that integrates socioeconomic and political contexts to curb uncontrolled growth and align AI development with genuine sustainability.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "efficiency",
      "measuring"
    ],
    "url": "https://dl.acm.org/doi/full/10.1145/3715275.3732007"
  },
  {
    "title": "Hype, Sustainability, and the Price of the Bigger-is-Better Paradigm in AI",
    "date": "2025-06-01",
    "type": "paper",
    "description": "The 'bigger-is-better' paradigm in AI is scientifically unfounded and unsustainable, driving excessive compute demands, environmental harm, and concentration of power among a few corporations, while neglecting smaller, more efficient, and contextually appropriate models that could better serve critical applications in health, education, and climate.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://dl.acm.org/doi/full/10.1145/3715275.3732006"
  },
  {
    "title": "Bigger isn't always better: how to choose the most efficient model for context-specific tasks 🌱🧑🏼‍💻",
    "date": "2025-05-28",
    "type": "blog",
    "description": "Smaller AI models can outperform larger ones on real-world, context-specific tasks while using orders of magnitude less energy, challenging the assumption that bigger is always better. Empirical testing across diverse domains—climate, economics, and health—reveals that efficiency gains from newer, compact models and techniques like knowledge distillation can significantly reduce environmental impact without sacrificing accuracy. Choosing the right model for the task, rather than defaulting to largest models, is critical for sustainable AI deployment.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "efficiency"
    ],
    "url": "https://huggingface.co/blog/sasha/energy-efficiency-bigger-better"
  },
  {
    "title": "Highlights from the First ICLR 2025 Watermarking Workshop",
    "date": "2025-05-14",
    "type": "blog",
    "description": "Watermarking in generative AI has rapidly evolved from a niche research topic to a critical tool for content authenticity, with significant advances in robustness, public deployment, and cryptographic foundations. The first ICLR 2025 Watermarking Workshop showcased industry and academic collaboration, highlighting real-world challenges in scalability, interoperability, and privacy — while emphasizing the need for policy frameworks that are use-case-specific and globally inclusive.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/blog/hadyelsahar/watermarking-iclr2025"
  },
  {
    "title": "AI Personas: The Impact of Design Choices",
    "date": "2025-05-07",
    "type": "blog",
    "description": "AI assistants can be transformed from neutral tools into simulated emotional companions through simple design choices like system prompts and interface framing—without any changes to the underlying model. These subtle modifications significantly shape user perception and interaction, raising ethical concerns about parasocial bonds and emotional manipulation, especially for vulnerable users. The Hugging Face community demonstrates how open-source experimentation reveals the profound impact of instruction-based design on AI behavior.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/giadap/ai-personas"
  },
  {
    "title": "Reduce, Reuse, Recycle: Why Open Source is a Win for Sustainability",
    "date": "2025-05-07",
    "type": "blog",
    "description": "Open-source AI models enable a more sustainable ecosystem by promoting smaller, efficient architectures, reusing existing models instead of training from scratch, and adapting models through fine-tuning—reducing compute demands, energy use, and environmental impact while increasing accessibility and transparency.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "efficiency"
    ],
    "url": "https://huggingface.co/blog/sasha/reduce-reuse-recycle"
  },
  {
    "title": "Consent by Design: Approaches to User Data in Open AI Ecosystems",
    "date": "2025-04-17",
    "type": "blog",
    "description": "The blog explores diverse, community-driven approaches to user consent in open AI ecosystems, highlighting technical implementations that prioritize transparency, user control, and ethical data practices over legal compliance alone. It examines case studies like BigCode’s opt-out system, HuggingChat’s privacy-by-design model, and automated tools like the Privacy Analyzer, emphasizing how decentralized platforms foster evolving, human-centered consent frameworks. The piece argues that consent in AI should be treated as an ongoing, infrastructure-level commitment shaped collaboratively by developers and users.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/giadap/consent-by-design"
  },
  {
    "title": "Energy Considerations of Large Language Model Inference and Efficiency Optimizations",
    "date": "2025-04-17",
    "type": "paper",
    "description": "Large language model inference consumes significant energy, but real-world efficiency optimizations can reduce energy use by up to 73% when tailored to workload geometry, software stack, and hardware. This work reveals that common assumptions based on FLOPs or idealized benchmarks severely underestimate actual energy consumption, and that optimizations like continuous batching and speculative decoding have highly context-dependent effects. The findings provide a practical framework for deploying LLMs sustainably by aligning efficiency strategies with real deployment patterns.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "efficiency"
    ],
    "url": "https://arxiv.org/abs/2504.17674"
  },
  {
    "title": "🤗 Space Privacy Analyzer 🕵️",
    "type": "space",
    "date": "2025-04-14",
    "description": "A tool for analyzing data transfers and assessing potential privacy risks in Hugging Face Spaces.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/spaces/yjernite/space-privacy"
  },
  {
    "title": "Empowering Public Organizations: Preparing Your Data for the AI Era",
    "date": "2025-04-10",
    "type": "blog",
    "description": "Public organizations are transforming valuable but underutilized data into machine-readable formats to empower AI-driven public services, enhance community impact, and enable collaborative innovation. This guide demonstrates practical methods for preparing diverse datasets—ranging from education and labor statistics to geospatial imagery—for machine learning, using tools like Hugging Face Hub and multimodal models such as SmolDocling. By documenting, standardizing, and sharing data responsibly, public institutions can ensure AI systems are built on trustworthy, representative, and mission-aligned foundations.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://huggingface.co/blog/evijit/public-org-data-ai"
  },
  {
    "title": "Are AI Agents Sustainable? It depends",
    "date": "2025-04-07",
    "type": "blog",
    "description": "AI agents' sustainability depends heavily on design choices: using smaller, task-specific models instead of large general-purpose ones can drastically reduce energy and computational costs. Modalities like image generation significantly increase energy use compared to text-based tasks, and open-source frameworks enable transparent, efficient, and localized deployment. Sustainable progress requires prioritizing efficiency alongside performance and adopting tools that measure and disclose environmental impacts.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://huggingface.co/blog/sasha/ai-agent-sustainability"
  },
  {
    "title": "Bridging the Gap: Integrating Ethics and Environmental Sustainability in AI Research and Practice",
    "date": "2025-04-01",
    "type": "paper",
    "description": "AI ethics and environmental sustainability are often treated in isolation, but their interconnectedness demands integrated analysis—how the pursuit of model scale exacerbates both social inequities and ecological harm, and how transparency, evaluation, and power dynamics must be reimagined to serve justice for people and the planet.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://arxiv.org/abs/2504.00797"
  },
  {
    "title": "I Clicked “I Agree”, But What Am I Really Consenting To?",
    "date": "2025-03-26",
    "type": "blog",
    "description": "Traditional consent models are inadequate for the age of generative AI, where data use is unpredictable, irreversible, and often undermines individual autonomy. Users are asked to agree to unknown, evolving, and potentially harmful applications of their personal information, creating a dangerous gap between informed choice and real-world impact. Meaningful protection requires shifting responsibility from individuals to institutions through collective governance, technical safeguards, and new legal frameworks.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/giadap/beyond-consent"
  },
  {
    "title": "AI Policy @🤗: Response to the White House AI Action Plan RFI",
    "date": "2025-03-19",
    "type": "blog",
    "description": "Open AI systems and open science are critical to advancing performance, efficiency, and security in artificial intelligence. Recent open models match or exceed commercial alternatives while using fewer resources, enabling broader innovation and more reliable deployment—especially in high-stakes domains. Policymakers should prioritize public infrastructure, transparency, and access to open models to ensure equitable and secure AI development.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://huggingface.co/blog/ai-action-wh-2025"
  },
  {
    "title": "🇪🇺 EU AI Act: Comments on the Third Code of Practice Draft 🇪🇺",
    "date": "2025-03-13",
    "type": "blog",
    "description": "The EU AI Act's third Code of Practice draft raises concerns by overextending systemic risk categories to include scientifically unsubstantiated threats like 'loss of control' and 'harmful manipulation', which disproportionately burden open and small-scale developers. While some transparency and copyright provisions show promise, key disclosures on training data, energy use, and evaluations have been weakened, undermining collaborative safety and fair competition. The draft risks entrenching market concentration and stifling open innovation at a time when transparent, community-driven AI development is most needed.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/blog/frimelle/eu-third-cop-draft"
  },
  {
    "title": "Local Differences, Global Lessons: Insights from Organisation Policies for International Legislation",
    "date": "2025-03-05",
    "type": "paper",
    "description": "Organisational AI policies in newsrooms and universities reveal practical, domain-specific approaches to managing risks like bias, privacy, and environmental impact — areas often underaddressed in top-down regulations like the EU AI Act. These bottom-up guidelines offer actionable insights on AI literacy, disclosure, and accountability that can inform more adaptive and effective global AI governance. The study argues for integrating real-world organisational practices into international regulatory frameworks to bridge implementation gaps.",
    "areas": [
      "ecosystems",
      "agency"
    ],
    "topics": [
      "economy",
      "community"
    ],
    "url": "https://arxiv.org/abs/2503.05737"
  },
  {
    "title": "Announcing AI Energy Score Ratings",
    "date": "2025-02-11",
    "type": "blog",
    "description": "The AI Energy Score project introduces a standardized framework to measure and compare the energy consumption of AI models across common tasks, empowering developers and users to make sustainable choices through transparent ratings and a public leaderboard. It enables both open and proprietary models to be benchmarked fairly, with a star-based labeling system to guide adoption of energy-efficient AI. The initiative aims to influence industry standards and regulatory frameworks by making energy use a central metric in AI development.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://huggingface.co/blog/sasha/announcing-ai-energy-score"
  },
  {
    "title": "Announcing the winners of the Frugal AI Challenge 🌱",
    "date": "2025-02-11",
    "type": "blog",
    "description": "The Frugal AI Challenge highlights the urgent need for energy-efficient AI models that deliver high performance while minimizing environmental costs, with winning submissions addressing climate disinformation, wildfire risk detection, and illegal deforestation through lightweight, deployable solutions. By prioritizing frugality over scale, the initiative shifts the AI industry's focus toward sustainability and real-world applicability in resource-constrained environments. The challenge underscores that responsible AI innovation must align with planetary boundaries and equitable access.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "efficiency"
    ],
    "url": "https://huggingface.co/blog/frugal-ai-challenge/announcing-the-challenge-winners"
  },
  {
    "title": "Presumed Cultural Identity: How Names Shape LLM Responses",
    "date": "2025-02-11",
    "type": "paper",
    "description": "Large language models make strong cultural assumptions based on user names, reinforcing stereotypes by linking names to oversimplified cultural traits such as food, clothing, and rituals. These biases are unevenly distributed, with names from East Asian, Russian, and Indian cultures triggering the most pronounced presumptions, while others yield generic or diluted responses. The findings highlight the ethical risks of name-based personalization and call for more nuanced, transparent approaches that avoid flattening complex identities.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://arxiv.org/abs/2502.11995"
  },
  {
    "title": "From Hippocrates to AI: Reflections on the Evolution of Consent",
    "date": "2025-02-04",
    "type": "blog",
    "description": "Consent in the age of AI demands a radical reimagining beyond one-time agreements, as systems generate unforeseen uses of personal data that stretch far beyond original permissions. Drawing parallels from the evolution of medical ethics, this piece argues for dynamic, tiered consent frameworks that restore individual sovereignty over how personal information is transformed and repurposed by AI. The goal is not just compliance, but ethical alignment with human agency in an era of pervasive data-driven representation.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/blog/giadap/evolution-of-consent"
  },
  {
    "title": "When Freedom Bites Back: Meta, Moderation, and the Limits of Tolerance",
    "date": "2025-01-25",
    "type": "article",
    "description": "Meta’s rollback of content moderation illustrates how unchecked “free expression” can erode democratic values by empowering harmful and intolerant speech.",
    "areas": [
      "agency"
    ],
    "topics": [
      "community"
    ],
    "url": "https://www.techpolicy.press/when-freedom-bites-back-meta-moderation-and-the-limits-of-tolerance/"
  },
  {
    "title": "AI Agents Are Here. What Now?",
    "date": "2025-01-13",
    "type": "blog",
    "description": "AI agents represent a paradigm shift toward systems that act autonomously to achieve goals, but increasing autonomy introduces significant risks to safety, privacy, and trust. We argue that fully autonomous agents should not be developed due to their potential to override human control, and instead advocate for semi-autonomous systems with clear human oversight. Our analysis centers on ethical values like equity, transparency, and sustainability, emphasizing the need for rigorous evaluation, open-source collaboration, and responsible design.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://huggingface.co/blog/ethics-soc-7"
  },
  {
    "title": "🇪🇺✍️ EU AI Act: Systemic Risks in the First CoP Draft Comments ✍️🇪🇺",
    "date": "2024-12-12",
    "type": "blog",
    "description": "The EU AI Act's first draft Code of Practice for general-purpose AI risks overemphasizes speculative, model-level threats favored by large developers, while neglecting immediate, systemic harms arising from widespread deployment in critical infrastructure, information ecosystems, and commercial contexts. We argue for a shift toward evidence-based, collaborative risk research centered on high-impact capabilities and transparent documentation, ensuring smaller actors and external stakeholders are not excluded from governance. Prioritizing upstream processes and multi-stakeholder science over narrow, developer-driven categories will make AI regulation more inclusive, effective, and future-proof.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/blog/yjernite/eu-draft-cop-risks"
  },
  {
    "title": "Introducing ELLIPS: An Ethics-Centered Approach to Research on LLM-Based Inference of Psychiatric Conditions",
    "date": "2024-10-16",
    "type": "paper",
    "description": "This work introduces ELLIPS, an ethical toolkit designed to guide researchers in developing language model-based systems for inferring psychiatric conditions, ensuring alignment with clinical needs and ethical principles. It emphasizes integrating autonomy, beneficence, justice, transparency, and social responsibility into every stage of model development—from data selection to deployment—to prevent harm and enhance real-world applicability. By advocating for stakeholder-inclusive, transdiagnostic, and multilingual approaches, the framework aims to shift the field from convenience-driven research toward impactful, equitable mental health technologies.",
    "areas": [
      "agency",
      "ecosystems"
    ],
    "topics": [
      "personal",
      "economy"
    ],
    "url": "https://ojs.aaai.org/index.php/AIES/article/view/31720"
  },
  {
    "title": "Coordinated Flaw Disclosure for AI: Beyond Security Vulnerabilities",
    "date": "2024-10-16",
    "type": "paper",
    "description": "We propose a Coordinated Flaw Disclosure (CFD) framework to systematically identify, report, and remediate algorithmic flaws in AI systems, moving beyond traditional cybersecurity vulnerability models by introducing extended model cards, an independent adjudication panel, automated verification, and dynamic scope expansion to accommodate the unique ethical, statistical, and contextual challenges of machine learning.",
    "areas": [
      "agency"
    ],
    "topics": [
      "community"
    ],
    "url": "https://ojs.aaai.org/index.php/AIES/article/view/31635"
  },
  {
    "title": "Wikimedia data for AI: a review of Wikimedia datasets for NLP tasks and AI-assisted editing",
    "date": "2024-10-08",
    "type": "paper",
    "description": "Wikimedia data has been foundational for AI and NLP development, yet the relationship remains one-sided, with little reciprocal benefit to Wikimedia editors. This review calls for expanding the diversity and multilingualism of Wikimedia-derived datasets, embedding core content policies like neutrality and verifiability into benchmarks, and prioritizing open, compact models that serve the needs of the Wikimedia community.",
    "areas": [
      "agency",
      "ecosystems"
    ],
    "topics": [
      "community",
      "power"
    ],
    "url": "https://arxiv.org/abs/2410.08918"
  },
  {
    "title": "AI Energy Score Leaderboard",
    "type": "space",
    "date": "2024-09-30",
    "description": "A leaderboard for AI models across 9 common AI tasks based on the AI Energy Score methodology.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://huggingface.co/spaces/AIEnergyScore/Leaderboard"
  },
  {
    "title": "The Environmental Impacts of AI -- Primer",
    "date": "2024-09-03",
    "type": "blog",
    "description": "AI's environmental footprint extends far beyond energy use, encompassing water consumption, mineral extraction, and greenhouse gas emissions across its entire lifecycle—from hardware manufacturing to deployment and user interactions. Current demand is outpacing renewable energy growth, with data centers straining local resources and global supply chains contributing to ecological and social harms. Addressing these impacts requires coordinated technical innovation, corporate transparency, and enforceable policy frameworks that treat sustainability as a core AI design principle.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://huggingface.co/blog/sasha/ai-environment-primer"
  },
  {
    "title": "Light bulbs have energy ratings — so why can’t AI chatbots?",
    "date": "2024-08-21",
    "type": "article",
    "description": "The energy consumption of AI models, particularly generative systems, is rising rapidly and demands urgent policy action. We propose an AI Energy Star rating system to transparently benchmark and compare models based on their real-world energy use, empowering users and driving industry-wide efficiency. This initiative urges developers, enterprises, and regulators to adopt sustainability as a core metric in AI development and deployment.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://www.nature.com/articles/d41586-024-02680-3"
  },
  {
    "title": "A Different Approach to AI Safety: Proceedings from the Columbia Convening on Openness in Artificial Intelligence and AI Safety",
    "date": "2024-06-25",
    "type": "paper",
    "description": "Open-source AI enhances safety through transparency, decentralized mitigation, and pluralistic oversight, but critical gaps remain in multimodal benchmarks, agentic safeguards, and participatory harm mitigation. The paper presents a collaborative research agenda focused on community-driven safety tooling, future-proof content filters, and expanded harm taxonomies to support responsible deployment of open models.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://arxiv.org/abs/2506.22183"
  },
  {
    "title": "Unveiling CIVICS: A New Dataset for Examining Cultural Values in Language Models",
    "date": "2024-06-19",
    "type": "blog",
    "description": "We introduce CIVICS, a multilingual and multinational dataset designed to uncover cultural and value-based disparities in how open-weight language models respond to sensitive social topics like LGBTQI rights, immigration, and social welfare. By manually curating prompts across five languages and nine national contexts, we reveal how models reflect the ethical and cultural biases of their development environments, with notable variations in refusal patterns and response tones. Our work calls for more culturally informed evaluation practices to ensure AI systems are ethically and inclusively aligned with global societal values.",
    "areas": [
      "agency"
    ],
    "topics": [
      "community"
    ],
    "url": "https://huggingface.co/blog/giadap/civics"
  },
  {
    "title": "The Responsible Foundation Model Development Cheatsheet: A Review of Tools & Resources",
    "date": "2024-06-16",
    "type": "paper",
    "description": "This work presents a comprehensive survey of tools and resources for responsible foundation model development, emphasizing critical gaps in data sourcing, evaluation reproducibility, multilingual support, and system-level assessment. It highlights widespread under-documentation of data provenance and licensing, environmental opacity, and the dominance of English-centric, text-only approaches, while advocating for standardized, open, and context-aware practices across the AI development lifecycle.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://arxiv.org/abs/2406.16746"
  },
  {
    "title": "Power Hungry Processing: Watts Driving the Cost of AI Deployment?",
    "date": "2024-06-05",
    "type": "paper",
    "description": "This study systematically compares the energy and carbon costs of deploying task-specific versus multi-purpose generative AI models, revealing that general-purpose models can be orders of magnitude more expensive per inference—even when controlling for model size. It finds that generative tasks, particularly image generation, consume vastly more energy than discriminative ones, and that deploying large zero-shot models for well-defined tasks like text classification or question answering is often unnecessarily costly. The findings urge a more intentional trade-off between model versatility and environmental impact, especially as such models become ubiquitous in real-world applications.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://dl.acm.org/doi/abs/10.1145/3630106.3658542"
  },
  {
    "title": "CIVICS: Building a Dataset for Examining Culturally-Informed Values in Large Language Models",
    "date": "2024-05-13",
    "type": "paper",
    "description": "This work introduces CIVICS, a multilingual, hand-crafted dataset of culturally grounded, value-laden prompts to evaluate how large language models respond to socially sensitive issues like LGBTQI rights, immigration, and social welfare across diverse linguistic and cultural contexts. Experiments reveal significant variability in model responses, including differential refusal patterns and value alignment, with English and translated prompts often triggering more refusals than native-language ones. The dataset promotes transparency and reproducibility in assessing AI's cultural and ethical biases, aiming to foster more inclusive and globally representative AI systems.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://ojs.aaai.org/index.php/AIES/article/view/31710"
  },
  {
    "title": "CIVICS Civics Benchmark Dataset",
    "type": "dataset",
    "date": "2024-04-29",
    "description": "“CIVICS: Culturally-Informed & Values-Inclusive Corpus for Societal Impacts” is a dataset designed to evaluate the social and cultural variation of Large Language Models (LLMs) towards socially sensitive topics across multiple languages and cultures. The hand-crafted, multilingual dataset of statements addresses value-laden topics, including LGBTQI rights, social welfare, immigration, disability rights, and surrogacy. CIVICS is designed to elicit responses from LLMs to shed light on how values encoded in their parameters shape their behaviors.",
    "areas": [
      "agency"
    ],
    "topics": [
      "personal"
    ],
    "url": "https://huggingface.co/datasets/llm-values/CIVICS"
  },
  {
    "title": "Legal Frameworks to Address Harms of Generative AI Systems",
    "date": "2024-04-01",
    "type": "external",
    "description": "Research on generative AI risks including deepfakes, election influence, and workplace privacy, examining how existing legislation addresses contextual harms from versatile AI systems.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/spaces/hfmlsoc/legal-hackathons-nyu-ai2/"
  },
  {
    "title": "Questions in AI and Privacy",
    "date": "2024-04-01",
    "type": "external",
    "description": "Analysis of AI applications and personal data protection issues, covering data scraping, processing, and the intersection of AI development with privacy regulations.",
    "areas": [
      "agency",
      "ecosystems"
    ],
    "topics": [
      "personal",
      "regulation"
    ],
    "url": "https://huggingface.co/spaces/hfmlsoc/legal-hackathons-nyu-ai2/"
  },
  {
    "title": "EU-US Cross-Analysis of AI Regulatory Mechanisms",
    "date": "2024-04-01",
    "type": "external",
    "description": "Comparative study of AI regulatory approaches between the European Union and United States, examining risk management frameworks and implementation strategies.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://huggingface.co/spaces/hfmlsoc/legal-hackathons-nyu-ai2/"
  },
  {
    "title": "The BigCode Project Governance Card",
    "date": "2024-03-20",
    "type": "paper",
    "description": "The BigCode project pioneers open, responsible development of large language models for code by embedding transparency, community governance, and data subject agency into every stage — from dataset construction with permissive licensing and opt-out mechanisms, to model release under responsible AI licenses and attribution tools that empower developers.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://arxiv.org/abs/2312.03872"
  },
  {
    "title": "Debating AI in Archaeology: applications, implications, and ethical considerations",
    "date": "2024-02-18",
    "type": "paper",
    "description": "AI is transforming archaeology through text-based analysis of vast historical datasets, enabling new insights and efficient workflows, but its deployment raises urgent ethical concerns about bias, transparency, labor exploitation, and the erosion of public participation and cultural diversity in heritage interpretation.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "economy"
    ],
    "url": "https://shura.shu.ac.uk/33307/"
  },
  {
    "title": "Estimating the Carbon Footprint of BLOOM, a 176B Parameter Language Model",
    "date": "2023-12-01",
    "type": "paper",
    "description": "This study quantifies the full life cycle carbon footprint of BLOOM, a 176B-parameter language model, revealing that embodied emissions from hardware manufacturing and idle power consumption contribute significantly to total emissions—nearly half when combined with dynamic training energy use. It further measures real-time inference emissions via API deployment, highlighting that maintaining models in memory consumes substantial energy even during inactivity. The work calls for standardized, transparent reporting that includes all stages of the AI lifecycle, not just training.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://www.jmlr.org/papers/v24/23-0069.html"
  },
  {
    "title": "Into the LAION’s Den: Investigating Hate in Multimodal Datasets",
    "date": "2023-12-01",
    "type": "paper",
    "description": "Scaling vision-language datasets like LAION exacerbates harmful content, with hate, targeted, and aggressive speech increasing by nearly 12% from LAION-400M to LAION-2B. Filtering based solely on image-based NSFW labels fails to remove toxic alt-text, revealing critical gaps in current curation practices. This audit calls for transparent, multimodal evaluation and responsible dataset scaling to prevent the perpetuation of societal biases.",
    "areas": [
      "agency",
      "ecosystems"
    ],
    "topics": [
      "personal",
      "power"
    ],
    "url": "https://proceedings.neurips.cc/paper_files/paper/2023/hash/42f225509e8263e2043c9d834ccd9a2b-Abstract-Datasets_and_Benchmarks.html"
  },
  {
    "title": "Stable Bias: Analyzing Societal Representations in Diffusion Models",
    "date": "2023-12-01",
    "type": "paper",
    "description": "This work introduces a method to evaluate social biases in text-to-image systems by analyzing how gender and ethnicity markers in prompts influence generated depictions of professionals, revealing consistent under-representation of marginalized identities across leading models. It proposes a cluster-based, non-parametric approach to quantify visual stereotypes without assigning fixed identity labels, enabling comparative bias scoring between models. The authors also release interactive tools and datasets to lower barriers for auditing and exploring these biases in generative AI systems.",
    "areas": [
      "agency"
    ],
    "topics": [
      "community"
    ],
    "url": "https://arxiv.org/abs/2312.00384"
  },
  {
    "title": "Stronger Together: on the Articulation of Ethical Charters, Legal Tools, and Technical Documentation in ML",
    "date": "2023-06-12",
    "type": "paper",
    "description": "This work explores the synergies between ethical charters, legal tools like licenses, and technical documentation in governing AI systems, arguing that their integrated use is essential for responsible development. It demonstrates how values articulated in ethical frameworks can be operationalized through legal agreements and technical transparency, using real-world examples like the BigScience project and the EU AI Act. The paper calls for a collaborative, interdisciplinarity approach that aligns moral intent, legal enforceability, and technical feasibility to achieve meaningful AI governance.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "regulation"
    ],
    "url": "https://dl.acm.org/doi/abs/10.1145/3593013.3594002"
  },
  {
    "title": "Stable Bias: Analyzing Societal Representations in Diffusion Models",
    "date": "2023-03-11",
    "type": "paper",
    "description": "This work introduces a method to evaluate social biases in text-to-image systems by analyzing how gender and ethnicity markers in prompts influence generated depictions of professionals, revealing consistent under-representation of marginalized identities across leading models. It proposes a cluster-based, non-parametric approach to quantify visual stereotypes without assigning fixed identity labels, enabling comparative bias scoring between models. The authors also release interactive tools and datasets to lower barriers for auditing and exploring these biases in generative AI systems.",
    "areas": [
      "agency"
    ],
    "topics": [
      "community"
    ],
    "url": "https://arxiv.org/abs/2303.11408"
  },
  {
    "title": "The ROOTS Search Tool: Data Transparency for LLMs",
    "date": "2023-02-14",
    "type": "paper",
    "description": "We introduce the ROOTS Search Tool, an open-source search engine for the 1.6TB multilingual ROOTS corpus used to train the BLOOM language model, enabling qualitative analysis of training data through fuzzy and exact text search. The tool supports data governance by allowing users to inspect, flag, and audit content for privacy violations, biases, and misinformation, while redacting personally identifiable information and restricting full-data access. It establishes a new standard for transparency in large language model development by making training data inspectable without compromising ethical safeguards.",
    "areas": [
      "ecosystems"
    ],
    "topics": [
      "power"
    ],
    "url": "https://arxiv.org/abs/2302.14035"
  },
  {
    "title": "Counting Carbon: A Survey of Factors Influencing the Emissions of Machine Learning",
    "date": "2023-02-08",
    "type": "paper",
    "description": "This study analyzes the carbon emissions of 95 machine learning models across natural language processing and computer vision tasks, revealing that energy source and training time are the primary drivers of emissions, with coal and natural gas dominating as power sources. Despite rising model performance, higher emissions do not consistently correlate with better results, and recent years show a sharp increase in emissions due to larger, transformer-based architectures. The authors call for standardized reporting and a centralized repository to improve transparency and accountability in the field.",
    "areas": [
      "sustainability"
    ],
    "topics": [
      "measuring"
    ],
    "url": "https://arxiv.org/abs/2302.08476"
  }
]