Harika22 commited on
Commit
c32fa29
·
verified ·
1 Parent(s): 688f54c

Update pages/4_Simple_EDA.py

Browse files
Files changed (1) hide show
  1. pages/4_Simple_EDA.py +38 -0
pages/4_Simple_EDA.py CHANGED
@@ -99,3 +99,41 @@ st.markdown('''
99
  - Whether the collected text data contains any data /time
100
  ''')
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  - Whether the collected text data contains any data /time
100
  ''')
101
 
102
+ st.code('''
103
+ import pandas as pd
104
+ import numpy as np
105
+ import re
106
+ import emoji
107
+
108
+ def simple_eda(data,column):
109
+ lower_upper = data[column].apply(lambda x:True if (x.lower()) or (x.upper()) else False).sum()
110
+ tags = data[column].apply(lambda x:True if re.search("<.*?>",x) else False).sum()
111
+ urls = data[column].apply(lambda x:True if re.search("https://\S+",x) else False).sum()
112
+ mails = data[column].apply(lambda x:True if re.search("\S+@\S+",x) else False).sum()
113
+ mentions = data[column].apply(lambda x:True if re.search("\B[@#]\S+",x) else False).sum()
114
+ emojis = data[column].apply(lambda x:True if emoji.emoji_count(x) else False).sum()
115
+ digit = data[column].apply(lambda x:True if re.search("\d",x) else False).sum()
116
+ punc = data[column].apply(lambda x:True if re.search('[!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~]',x) else False).sum()
117
+ dates = data[column].apply(lambda x:True if re.search(r"^[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{4}$",x) else False).sum()
118
+
119
+
120
+ if lower_upper >0:
121
+ print("text have combination")
122
+ if tags > 0:
123
+ print("text have tags")
124
+ if urls >0:
125
+ print("text have urls")
126
+ if mails > 0:
127
+ print("text have mails")
128
+ if mentions >0:
129
+ print("text have mentions")
130
+ if emojis > 0:
131
+ print("text have emojis")
132
+ if digit >0:
133
+ print("text have digit")
134
+ if punc > 0:
135
+ print("text have punctuations")
136
+ if dates >0:
137
+ print("text have dates")
138
+
139
+ ''')