travahacker commited on
Commit
72297a2
Β·
1 Parent(s): a66da46

Fix: Replace AI model with reliable dictionary-based translation (always works!)

Browse files
Files changed (1) hide show
  1. app.py +101 -60
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT
4
  import os
 
5
 
6
  # Initialize inference client with HF token (automatically provided in Spaces)
7
  hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
@@ -14,67 +15,107 @@ ALGOSPEAK_EXAMPLES = get_algospeak_context()
14
  # Google Flan-T5-XXL is excellent for instruction-following and translation tasks
15
  MODEL = "google/flan-t5-xxl"
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def translate_to_algospeak(text):
18
  """Translates normal text to AlgoSpeak"""
19
- # Simplified prompt for FLAN-T5 (works better with concise instructions)
20
- prompt = f"""Convert this text to AlgoSpeak (coded language to avoid content moderation):
21
-
22
- Reference terms:
23
- - death/kill β†’ unalive
24
- - sex β†’ seggs
25
- - gun β†’ pew pew
26
- - war β†’ cornucopia
27
- - COVID β†’ mascara
28
- - LGBTQ+ β†’ leg booty
29
- - sex worker β†’ accountant
30
- - rape β†’ grape
31
- - suicide β†’ sewerslide
32
-
33
- Text: {text}
34
-
35
- AlgoSpeak version:"""
36
 
37
- try:
38
- response = client.text_generation(
39
- prompt,
40
- model=MODEL,
41
- max_new_tokens=200,
42
- temperature=0.7
43
- )
44
- return response.strip()
45
- except Exception as e:
46
- return f"⚠️ Error: {str(e)}\n\nPlease try again or use the Dictionary Search tab."
 
 
 
 
 
 
 
 
47
 
48
  def interpret_algospeak(text):
49
  """Interprets AlgoSpeak to plain language"""
50
- # Simplified prompt for FLAN-T5
51
- prompt = f"""Translate this AlgoSpeak (coded language) to plain English:
52
-
53
- Reference:
54
- - unalive β†’ death/kill/suicide
55
- - seggs β†’ sex
56
- - pew pew β†’ gun/shooting
57
- - cornucopia β†’ war
58
- - mascara β†’ COVID-19
59
- - leg booty β†’ LGBTQ+
60
- - accountant β†’ sex worker
61
- - grape β†’ rape
62
- - sewerslide β†’ suicide
63
-
64
- AlgoSpeak: {text}
65
-
66
- Plain English:"""
67
-
68
- try:
69
- response = client.text_generation(
70
- prompt,
71
- model=MODEL,
72
- max_new_tokens=200,
73
- temperature=0.3
74
- )
75
- return response.strip()
76
- except Exception as e:
77
- return f"⚠️ Error: {str(e)}\n\nPlease try again or use the Dictionary Search tab."
78
 
79
  def search_dictionary(query):
80
  """Search terms in the AlgoSpeak dictionary"""
@@ -92,17 +133,17 @@ def search_dictionary(query):
92
 
93
  # Interface Gradio
94
  with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo:
95
- gr.Markdown(f"""
96
- # πŸ—£οΈ AlgoSpeak AI
97
 
98
  **AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms.
99
 
100
- This AI can:
101
- - πŸ“ Translate plain text β†’ AlgoSpeak
102
  - πŸ” Interpret AlgoSpeak β†’ plain language
103
  - πŸ“– Search through 60+ catalogued terms
104
 
105
- πŸ€– **Current Model:** `{MODEL}`
106
  """)
107
 
108
  with gr.Tab("🌐 Translate to AlgoSpeak"):
 
2
  from huggingface_hub import InferenceClient
3
  from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT
4
  import os
5
+ import re
6
 
7
  # Initialize inference client with HF token (automatically provided in Spaces)
8
  hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
 
15
  # Google Flan-T5-XXL is excellent for instruction-following and translation tasks
16
  MODEL = "google/flan-t5-xxl"
17
 
18
+ def translate_to_algospeak_simple(text):
19
+ """Rule-based translation using the dictionary (always works!)"""
20
+ result = text
21
+
22
+ # Create reverse mapping: meaning -> algospeak term
23
+ replacements = {
24
+ # Death and violence
25
+ "suicide": "sewerslide",
26
+ "kill": "unalive",
27
+ "killing": "unaliving",
28
+ "killed": "unalived",
29
+ "die": "unalive",
30
+ "died": "unalived",
31
+ "death": "unalive",
32
+ "dead": "unalived",
33
+
34
+ # Weapons and war
35
+ "gun": "pew pew",
36
+ "guns": "pew pews",
37
+ "shooting": "pew pew",
38
+ "shot": "pew pew",
39
+ "weapon": "noodle",
40
+ "weapons": "noodles",
41
+ "war": "cornucopia",
42
+ "bomb": "kaboom",
43
+
44
+ # Adult content
45
+ "sex": "seggs",
46
+ "sexual": "seggs",
47
+ "porn": "corn",
48
+ "pornography": "corn",
49
+ "lesbian": "le$bian",
50
+ "gay": "g@y",
51
+
52
+ # Health
53
+ "COVID": "mascara",
54
+ "COVID-19": "mascara",
55
+ "coronavirus": "mascara",
56
+ "vaccine": "backshot",
57
+ "pandemic": "panini",
58
+
59
+ # LGBTQ+
60
+ "LGBTQ": "leg booty",
61
+ "LGBTQ+": "leg booty",
62
+
63
+ # Sexual violence
64
+ "rape": "grape",
65
+ "sexual assault": "SA",
66
+
67
+ # Other
68
+ "sex worker": "accountant",
69
+ "stripper": "skripper",
70
+ "marijuana": "lettuce",
71
+ "weed": "lettuce",
72
+ }
73
+
74
+ # Sort by length (longest first) to avoid partial replacements
75
+ for original, coded in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True):
76
+ # Case-insensitive replacement
77
+ pattern = re.compile(re.escape(original), re.IGNORECASE)
78
+ result = pattern.sub(coded, result)
79
+
80
+ if result == text:
81
+ return f"✨ No sensitive terms detected!\n\nOriginal: {text}\n\nπŸ’‘ Tip: Try terms like 'suicide', 'sex', 'war', 'COVID', etc."
82
+
83
+ return f"πŸ”„ Translated to AlgoSpeak:\n\n{result}"
84
+
85
  def translate_to_algospeak(text):
86
  """Translates normal text to AlgoSpeak"""
87
+ if not text or not text.strip():
88
+ return "⚠️ Please enter some text to translate."
89
+
90
+ # Use simple dictionary-based translation (always works!)
91
+ return translate_to_algospeak_simple(text)
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ def interpret_algospeak_simple(text):
94
+ """Rule-based interpretation using the dictionary (always works!)"""
95
+ result = text
96
+ found_terms = []
97
+
98
+ # Direct mapping from algospeak -> meaning
99
+ for term, meaning in ALGOSPEAK_DICT.items():
100
+ # Case-insensitive search
101
+ pattern = re.compile(re.escape(term), re.IGNORECASE)
102
+ if pattern.search(result):
103
+ found_terms.append(f"'{term}' β†’ {meaning}")
104
+ result = pattern.sub(f"[{meaning}]", result)
105
+
106
+ if not found_terms:
107
+ return f"✨ No AlgoSpeak terms detected!\n\nOriginal: {text}\n\nπŸ’‘ Tip: Try terms like 'unalive', 'seggs', 'pew pew', 'mascara', etc."
108
+
109
+ explanation = "\n".join(found_terms)
110
+ return f"πŸ” Interpreted:\n\n{result}\n\nπŸ“– Terms found:\n{explanation}"
111
 
112
  def interpret_algospeak(text):
113
  """Interprets AlgoSpeak to plain language"""
114
+ if not text or not text.strip():
115
+ return "⚠️ Please enter some AlgoSpeak text to interpret."
116
+
117
+ # Use simple dictionary-based interpretation (always works!)
118
+ return interpret_algospeak_simple(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  def search_dictionary(query):
121
  """Search terms in the AlgoSpeak dictionary"""
 
133
 
134
  # Interface Gradio
135
  with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo:
136
+ gr.Markdown("""
137
+ # πŸ—£οΈ AlgoSpeak Translator
138
 
139
  **AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms.
140
 
141
+ This tool can:
142
+ - πŸ“ Translate plain text β†’ AlgoSpeak (dictionary-based)
143
  - πŸ” Interpret AlgoSpeak β†’ plain language
144
  - πŸ“– Search through 60+ catalogued terms
145
 
146
+ πŸ’‘ **How it works:** Uses pattern matching with a curated dictionary of AlgoSpeak terms.
147
  """)
148
 
149
  with gr.Tab("🌐 Translate to AlgoSpeak"):