Spaces:
Sleeping
Sleeping
Commit ·
3c89f2b
1
Parent(s): f0d03d0
new preprocessing function added
Browse files
app.py
CHANGED
|
@@ -30,6 +30,21 @@
|
|
| 30 |
# gr.Interface(fn=predict, inputs="text", outputs="text", title="Mindscape").launch()
|
| 31 |
import gradio as gr
|
| 32 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Define the mapping from label to human-readable status
|
| 35 |
label_mapping = {
|
|
|
|
| 30 |
# gr.Interface(fn=predict, inputs="text", outputs="text", title="Mindscape").launch()
|
| 31 |
import gradio as gr
|
| 32 |
from transformers import pipeline
|
| 33 |
+
import re
|
| 34 |
+
def clean_text(text):
|
| 35 |
+
# Remove mentions (@username)
|
| 36 |
+
text = re.sub(r'@\w+', '', text)
|
| 37 |
+
|
| 38 |
+
# Remove URLs
|
| 39 |
+
text = re.sub(r'http\S+|www\S+', '', text)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Remove special characters, numbers, and extra spaces
|
| 43 |
+
text = re.sub(r'[^a-zA-Z\s]', '', text) # Keep only letters and spaces
|
| 44 |
+
text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
|
| 45 |
+
|
| 46 |
+
return text
|
| 47 |
+
|
| 48 |
|
| 49 |
# Define the mapping from label to human-readable status
|
| 50 |
label_mapping = {
|