coldnasser commited on
Commit
3c89f2b
·
1 Parent(s): f0d03d0

new preprocessing function added

Browse files
Files changed (1) hide show
  1. app.py +15 -0
app.py CHANGED
@@ -30,6 +30,21 @@
30
  # gr.Interface(fn=predict, inputs="text", outputs="text", title="Mindscape").launch()
31
  import gradio as gr
32
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # Define the mapping from label to human-readable status
35
  label_mapping = {
 
30
  # gr.Interface(fn=predict, inputs="text", outputs="text", title="Mindscape").launch()
31
  import gradio as gr
32
  from transformers import pipeline
33
+ import re
34
+ def clean_text(text):
35
+ # Remove mentions (@username)
36
+ text = re.sub(r'@\w+', '', text)
37
+
38
+ # Remove URLs
39
+ text = re.sub(r'http\S+|www\S+', '', text)
40
+
41
+
42
+ # Remove special characters, numbers, and extra spaces
43
+ text = re.sub(r'[^a-zA-Z\s]', '', text) # Keep only letters and spaces
44
+ text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
45
+
46
+ return text
47
+
48
 
49
  # Define the mapping from label to human-readable status
50
  label_mapping = {