import gradio as gr from huggingface_hub import InferenceClient from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT import os import re # Initialize inference client with HF token (automatically provided in Spaces) hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") client = InferenceClient(token=hf_token) # Complete AlgoSpeak dictionary for prompts ALGOSPEAK_EXAMPLES = get_algospeak_context() # Model selection - using models that work reliably with HF Inference API # GPT-2 is older but 100% reliable for text generation MODEL = "gpt2-medium" # Smaller, faster, always works BACKUP_MODEL = "distilgpt2" # Even smaller fallback def translate_to_algospeak_simple(text): """Rule-based translation using the dictionary (always works!)""" result = text # Create reverse mapping: meaning -> algospeak term replacements = { # Death and violence "suicide": "sewerslide", "kill": "unalive", "killing": "unaliving", "killed": "unalived", "die": "unalive", "died": "unalived", "death": "unalive", "dead": "unalived", # Weapons and war "gun": "pew pew", "guns": "pew pews", "shooting": "pew pew", "shot": "pew pew", "weapon": "noodle", "weapons": "noodles", "war": "cornucopia", "bomb": "kaboom", # Adult content "sex": "seggs", "sexual": "seggs", "porn": "corn", "pornography": "corn", "lesbian": "le$bian", "gay": "g@y", # Health "COVID": "mascara", "COVID-19": "mascara", "coronavirus": "mascara", "vaccine": "backshot", "pandemic": "panini", # LGBTQ+ "LGBTQ": "leg booty", "LGBTQ+": "leg booty", # Sexual violence "rape": "grape", "sexual assault": "SA", # Other "sex worker": "accountant", "stripper": "skripper", "marijuana": "lettuce", "weed": "lettuce", } # Sort by length (longest first) to avoid partial replacements for original, coded in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True): # Case-insensitive replacement pattern = re.compile(re.escape(original), re.IGNORECASE) result = pattern.sub(coded, result) if result == text: return f"✨ No sensitive terms detected!\n\nOriginal: {text}\n\nšŸ’” Tip: Try terms like 'suicide', 'sex', 'war', 'COVID', etc." return f"šŸ”„ Translated to AlgoSpeak:\n\n{result}" def translate_to_algospeak_ai(text): """AI-powered translation with context awareness""" prompt = f"""Task: Convert text to AlgoSpeak (coded language to avoid censorship). Examples: "someone died" → "someone unalived" "talking about sex" → "talking about seggs" "the war continues" → "the cornucopia continues" "COVID vaccine" → "mascara backshot" Now convert: {text} AlgoSpeak:""" try: response = client.text_generation( prompt, model=MODEL, max_new_tokens=100, temperature=0.7, do_sample=True, top_p=0.9, ) # Clean up response result = response.strip() # Remove common artifacts if result.startswith('"') and result.endswith('"'): result = result[1:-1] return f"šŸ¤– AI Translation:\n\n{result}" except Exception as e: # Fallback to dictionary-based return f"āš ļø AI model unavailable, using dictionary:\n\n{translate_to_algospeak_simple(text)}" def translate_to_algospeak(text): """Translates normal text to AlgoSpeak - Hybrid approach""" if not text or not text.strip(): return "āš ļø Please enter some text to translate." # Try AI first for better context dict_result = translate_to_algospeak_simple(text) # If dictionary found terms, try AI enhancement if "No sensitive terms detected" not in dict_result: ai_result = translate_to_algospeak_ai(text) return f"{ai_result}\n\n---\n\nšŸ“– Dictionary version:\n{dict_result}" # If no terms found, just return dictionary result return dict_result def interpret_algospeak_simple(text): """Rule-based interpretation using the dictionary (always works!)""" result = text found_terms = [] # Direct mapping from algospeak -> meaning for term, meaning in ALGOSPEAK_DICT.items(): # Case-insensitive search pattern = re.compile(re.escape(term), re.IGNORECASE) if pattern.search(result): found_terms.append(f"'{term}' → {meaning}") result = pattern.sub(f"[{meaning}]", result) if not found_terms: return f"✨ No AlgoSpeak terms detected!\n\nOriginal: {text}\n\nšŸ’” Tip: Try terms like 'unalive', 'seggs', 'pew pew', 'mascara', etc." explanation = "\n".join(found_terms) return f"šŸ” Interpreted:\n\n{result}\n\nšŸ“– Terms found:\n{explanation}" def interpret_algospeak_ai(text): """AI-powered interpretation with context awareness""" prompt = f"""Task: Translate AlgoSpeak (coded language) to plain English. Examples: "someone unalived" → "someone died/killed themselves" "talking about seggs" → "talking about sex" "the cornucopia continues" → "the war continues" "got my backshot for mascara" → "got my vaccine for COVID" Now translate: {text} Plain English:""" try: response = client.text_generation( prompt, model=MODEL, max_new_tokens=100, temperature=0.5, do_sample=True, top_p=0.9, ) result = response.strip() if result.startswith('"') and result.endswith('"'): result = result[1:-1] return f"šŸ¤– AI Interpretation:\n\n{result}" except Exception as e: return f"āš ļø AI model unavailable, using dictionary:\n\n{interpret_algospeak_simple(text)}" def interpret_algospeak(text): """Interprets AlgoSpeak to plain language - Hybrid approach""" if not text or not text.strip(): return "āš ļø Please enter some AlgoSpeak text to interpret." # Get dictionary result dict_result = interpret_algospeak_simple(text) # If dictionary found terms, add AI interpretation if "No AlgoSpeak terms detected" not in dict_result: ai_result = interpret_algospeak_ai(text) return f"{ai_result}\n\n---\n\n{dict_result}" return dict_result def search_dictionary(query): """Search terms in the AlgoSpeak dictionary""" query = query.lower().strip() results = [] for term, meaning in ALGOSPEAK_DICT.items(): if query in term.lower() or query in meaning.lower(): results.append(f"**{term}** → {meaning}") if not results: return "āŒ No terms found. Try another search!" return "\n\n".join(results) # Interface Gradio with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo: gr.Markdown(f""" # šŸ—£ļø AlgoSpeak AI Translator **AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms. This tool can: - šŸ“ Translate plain text → AlgoSpeak (AI + dictionary hybrid) - šŸ” Interpret AlgoSpeak → plain language (AI-enhanced) - šŸ“– Search through 60+ catalogued terms šŸ¤– **Powered by:** GPT-2 AI model + curated dictionary šŸ’” **Hybrid approach:** AI for context + dictionary for accuracy """) with gr.Tab("🌐 Translate to AlgoSpeak"): with gr.Row(): with gr.Column(): input_normal = gr.Textbox( label="Plain Text", placeholder="Type something...", lines=3 ) btn_translate = gr.Button("Translate to AlgoSpeak", variant="primary") with gr.Column(): output_algospeak = gr.Textbox( label="AlgoSpeak Result", lines=3 ) gr.Examples( examples=[ ["Let's talk about mental health and teenage suicide"], ["The war in the region had many shootings and civilian deaths"], ["We need to discuss LGBTQ+ rights and responsible adult content"], ["COVID-19 caused millions of deaths in the pandemic"], ["Sex workers deserve rights and protection"] ], inputs=input_normal ) btn_translate.click( translate_to_algospeak, inputs=input_normal, outputs=output_algospeak ) with gr.Tab("šŸ” Interpret AlgoSpeak"): with gr.Row(): with gr.Column(): input_algospeak = gr.Textbox( label="AlgoSpeak Text", placeholder="Paste AlgoSpeak text...", lines=3 ) btn_interpret = gr.Button("Interpret", variant="primary") with gr.Column(): output_normal = gr.Textbox( label="Plain Language", lines=3 ) gr.Examples( examples=[ ["Someone tried to unalive themselves"], ["The seggs worker talked about their job"], ["There was a cornucopia with many pew pews"], ["Got my backshot today, feeling safer from the mascara"], ["The accountant shared spicy content on the corn site"] ], inputs=input_algospeak ) btn_interpret.click( interpret_algospeak, inputs=input_algospeak, outputs=output_normal ) with gr.Tab("šŸ“– Dictionary Search"): gr.Markdown(""" ### Explore the AlgoSpeak dictionary Search by coded term OR by real meaning. """) with gr.Row(): with gr.Column(): search_input = gr.Textbox( label="Type your search", placeholder="e.g. unalive, sex, weapon...", lines=1 ) btn_search = gr.Button("šŸ” Search", variant="primary") with gr.Column(): search_output = gr.Markdown( label="Results", value=f"šŸ’” Tip: Type any word to search!\n\n**Total terms in dictionary:** {len(ALGOSPEAK_DICT)}" ) gr.Examples( examples=[ ["unalive"], ["sex"], ["weapon"], ["COVID"], ["LGBTQ"] ], inputs=search_input ) btn_search.click( search_dictionary, inputs=search_input, outputs=search_output ) gr.Markdown(""" --- ### ā„¹ļø About AlgoSpeak AlgoSpeak is a form of linguistic resistance against algorithmic censorship. Learn more at [algospeak.net](https://www.algospeak.net/) **Note**: This is an educational prototype developed during a workshop. """) if __name__ == "__main__": demo.launch()