travahacker
commited on
Commit
Β·
72297a2
1
Parent(s):
a66da46
Fix: Replace AI model with reliable dictionary-based translation (always works!)
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import gradio as gr
|
|
| 2 |
from huggingface_hub import InferenceClient
|
| 3 |
from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
# Initialize inference client with HF token (automatically provided in Spaces)
|
| 7 |
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
|
@@ -14,67 +15,107 @@ ALGOSPEAK_EXAMPLES = get_algospeak_context()
|
|
| 14 |
# Google Flan-T5-XXL is excellent for instruction-following and translation tasks
|
| 15 |
MODEL = "google/flan-t5-xxl"
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def translate_to_algospeak(text):
|
| 18 |
"""Translates normal text to AlgoSpeak"""
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
- sex β seggs
|
| 25 |
-
- gun β pew pew
|
| 26 |
-
- war β cornucopia
|
| 27 |
-
- COVID β mascara
|
| 28 |
-
- LGBTQ+ β leg booty
|
| 29 |
-
- sex worker β accountant
|
| 30 |
-
- rape β grape
|
| 31 |
-
- suicide β sewerslide
|
| 32 |
-
|
| 33 |
-
Text: {text}
|
| 34 |
-
|
| 35 |
-
AlgoSpeak version:"""
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def interpret_algospeak(text):
|
| 49 |
"""Interprets AlgoSpeak to plain language"""
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
- seggs β sex
|
| 56 |
-
- pew pew β gun/shooting
|
| 57 |
-
- cornucopia β war
|
| 58 |
-
- mascara β COVID-19
|
| 59 |
-
- leg booty β LGBTQ+
|
| 60 |
-
- accountant β sex worker
|
| 61 |
-
- grape β rape
|
| 62 |
-
- sewerslide β suicide
|
| 63 |
-
|
| 64 |
-
AlgoSpeak: {text}
|
| 65 |
-
|
| 66 |
-
Plain English:"""
|
| 67 |
-
|
| 68 |
-
try:
|
| 69 |
-
response = client.text_generation(
|
| 70 |
-
prompt,
|
| 71 |
-
model=MODEL,
|
| 72 |
-
max_new_tokens=200,
|
| 73 |
-
temperature=0.3
|
| 74 |
-
)
|
| 75 |
-
return response.strip()
|
| 76 |
-
except Exception as e:
|
| 77 |
-
return f"β οΈ Error: {str(e)}\n\nPlease try again or use the Dictionary Search tab."
|
| 78 |
|
| 79 |
def search_dictionary(query):
|
| 80 |
"""Search terms in the AlgoSpeak dictionary"""
|
|
@@ -92,17 +133,17 @@ def search_dictionary(query):
|
|
| 92 |
|
| 93 |
# Interface Gradio
|
| 94 |
with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo:
|
| 95 |
-
gr.Markdown(
|
| 96 |
-
# π£οΈ AlgoSpeak
|
| 97 |
|
| 98 |
**AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms.
|
| 99 |
|
| 100 |
-
This
|
| 101 |
-
- π Translate plain text β AlgoSpeak
|
| 102 |
- π Interpret AlgoSpeak β plain language
|
| 103 |
- π Search through 60+ catalogued terms
|
| 104 |
|
| 105 |
-
|
| 106 |
""")
|
| 107 |
|
| 108 |
with gr.Tab("π Translate to AlgoSpeak"):
|
|
|
|
| 2 |
from huggingface_hub import InferenceClient
|
| 3 |
from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT
|
| 4 |
import os
|
| 5 |
+
import re
|
| 6 |
|
| 7 |
# Initialize inference client with HF token (automatically provided in Spaces)
|
| 8 |
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
|
|
|
| 15 |
# Google Flan-T5-XXL is excellent for instruction-following and translation tasks
|
| 16 |
MODEL = "google/flan-t5-xxl"
|
| 17 |
|
| 18 |
+
def translate_to_algospeak_simple(text):
|
| 19 |
+
"""Rule-based translation using the dictionary (always works!)"""
|
| 20 |
+
result = text
|
| 21 |
+
|
| 22 |
+
# Create reverse mapping: meaning -> algospeak term
|
| 23 |
+
replacements = {
|
| 24 |
+
# Death and violence
|
| 25 |
+
"suicide": "sewerslide",
|
| 26 |
+
"kill": "unalive",
|
| 27 |
+
"killing": "unaliving",
|
| 28 |
+
"killed": "unalived",
|
| 29 |
+
"die": "unalive",
|
| 30 |
+
"died": "unalived",
|
| 31 |
+
"death": "unalive",
|
| 32 |
+
"dead": "unalived",
|
| 33 |
+
|
| 34 |
+
# Weapons and war
|
| 35 |
+
"gun": "pew pew",
|
| 36 |
+
"guns": "pew pews",
|
| 37 |
+
"shooting": "pew pew",
|
| 38 |
+
"shot": "pew pew",
|
| 39 |
+
"weapon": "noodle",
|
| 40 |
+
"weapons": "noodles",
|
| 41 |
+
"war": "cornucopia",
|
| 42 |
+
"bomb": "kaboom",
|
| 43 |
+
|
| 44 |
+
# Adult content
|
| 45 |
+
"sex": "seggs",
|
| 46 |
+
"sexual": "seggs",
|
| 47 |
+
"porn": "corn",
|
| 48 |
+
"pornography": "corn",
|
| 49 |
+
"lesbian": "le$bian",
|
| 50 |
+
"gay": "g@y",
|
| 51 |
+
|
| 52 |
+
# Health
|
| 53 |
+
"COVID": "mascara",
|
| 54 |
+
"COVID-19": "mascara",
|
| 55 |
+
"coronavirus": "mascara",
|
| 56 |
+
"vaccine": "backshot",
|
| 57 |
+
"pandemic": "panini",
|
| 58 |
+
|
| 59 |
+
# LGBTQ+
|
| 60 |
+
"LGBTQ": "leg booty",
|
| 61 |
+
"LGBTQ+": "leg booty",
|
| 62 |
+
|
| 63 |
+
# Sexual violence
|
| 64 |
+
"rape": "grape",
|
| 65 |
+
"sexual assault": "SA",
|
| 66 |
+
|
| 67 |
+
# Other
|
| 68 |
+
"sex worker": "accountant",
|
| 69 |
+
"stripper": "skripper",
|
| 70 |
+
"marijuana": "lettuce",
|
| 71 |
+
"weed": "lettuce",
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
# Sort by length (longest first) to avoid partial replacements
|
| 75 |
+
for original, coded in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True):
|
| 76 |
+
# Case-insensitive replacement
|
| 77 |
+
pattern = re.compile(re.escape(original), re.IGNORECASE)
|
| 78 |
+
result = pattern.sub(coded, result)
|
| 79 |
+
|
| 80 |
+
if result == text:
|
| 81 |
+
return f"β¨ No sensitive terms detected!\n\nOriginal: {text}\n\nπ‘ Tip: Try terms like 'suicide', 'sex', 'war', 'COVID', etc."
|
| 82 |
+
|
| 83 |
+
return f"π Translated to AlgoSpeak:\n\n{result}"
|
| 84 |
+
|
| 85 |
def translate_to_algospeak(text):
|
| 86 |
"""Translates normal text to AlgoSpeak"""
|
| 87 |
+
if not text or not text.strip():
|
| 88 |
+
return "β οΈ Please enter some text to translate."
|
| 89 |
+
|
| 90 |
+
# Use simple dictionary-based translation (always works!)
|
| 91 |
+
return translate_to_algospeak_simple(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
def interpret_algospeak_simple(text):
|
| 94 |
+
"""Rule-based interpretation using the dictionary (always works!)"""
|
| 95 |
+
result = text
|
| 96 |
+
found_terms = []
|
| 97 |
+
|
| 98 |
+
# Direct mapping from algospeak -> meaning
|
| 99 |
+
for term, meaning in ALGOSPEAK_DICT.items():
|
| 100 |
+
# Case-insensitive search
|
| 101 |
+
pattern = re.compile(re.escape(term), re.IGNORECASE)
|
| 102 |
+
if pattern.search(result):
|
| 103 |
+
found_terms.append(f"'{term}' β {meaning}")
|
| 104 |
+
result = pattern.sub(f"[{meaning}]", result)
|
| 105 |
+
|
| 106 |
+
if not found_terms:
|
| 107 |
+
return f"β¨ No AlgoSpeak terms detected!\n\nOriginal: {text}\n\nπ‘ Tip: Try terms like 'unalive', 'seggs', 'pew pew', 'mascara', etc."
|
| 108 |
+
|
| 109 |
+
explanation = "\n".join(found_terms)
|
| 110 |
+
return f"π Interpreted:\n\n{result}\n\nπ Terms found:\n{explanation}"
|
| 111 |
|
| 112 |
def interpret_algospeak(text):
|
| 113 |
"""Interprets AlgoSpeak to plain language"""
|
| 114 |
+
if not text or not text.strip():
|
| 115 |
+
return "β οΈ Please enter some AlgoSpeak text to interpret."
|
| 116 |
+
|
| 117 |
+
# Use simple dictionary-based interpretation (always works!)
|
| 118 |
+
return interpret_algospeak_simple(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
def search_dictionary(query):
|
| 121 |
"""Search terms in the AlgoSpeak dictionary"""
|
|
|
|
| 133 |
|
| 134 |
# Interface Gradio
|
| 135 |
with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo:
|
| 136 |
+
gr.Markdown("""
|
| 137 |
+
# π£οΈ AlgoSpeak Translator
|
| 138 |
|
| 139 |
**AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms.
|
| 140 |
|
| 141 |
+
This tool can:
|
| 142 |
+
- π Translate plain text β AlgoSpeak (dictionary-based)
|
| 143 |
- π Interpret AlgoSpeak β plain language
|
| 144 |
- π Search through 60+ catalogued terms
|
| 145 |
|
| 146 |
+
π‘ **How it works:** Uses pattern matching with a curated dictionary of AlgoSpeak terms.
|
| 147 |
""")
|
| 148 |
|
| 149 |
with gr.Tab("π Translate to AlgoSpeak"):
|