Harika22 commited on
Commit
b8713da
·
verified ·
1 Parent(s): 419554f

Update pages/5_Pre-procesing_of_text.py

Browse files
Files changed (1) hide show
  1. pages/5_Pre-procesing_of_text.py +12 -7
pages/5_Pre-procesing_of_text.py CHANGED
@@ -59,16 +59,20 @@ st.header(":blue[Pre-processing of Text🗺️]")
59
  st.markdown(
60
  '''
61
  <div class='section'>
62
- We will convert raw data into pre-processed data in 3 ways
63
- - **Cleaning** ---> which is based on the problem statement
64
- - **Simple pr-processing**
65
- - **Advance pre-processing**
 
 
 
66
  </div>
67
  ''',
68
  unsafe_allow_html=True,
69
  )
70
  st.markdown('''
71
  - Take a raw text and convert every character and word into single case
 
72
  - either upper case
73
 
74
  - or lower case
@@ -91,7 +95,8 @@ st.markdown(
91
  '''
92
  <div class='section'>
93
  Converts raw data into pre-processed data
94
- - which has 2 benefits
 
95
 
96
  - Reduce the dimensionality ---> to increase the performance of ML
97
 
@@ -101,8 +106,8 @@ st.markdown(
101
  <li><b>Removing URL's / tags/mails/mentions</b> Converting or preserving information should be based on the problem statement</li>
102
  <li><b>Handling Emoji's</b> Emoji's data should be preserved</li>
103
  <li><b>Contractions and acronyms</b>Both the contractions and acronyms should be converted into general text</li>
104
- <li><b>Stop Words</b> Stop words make the grammar very clear
105
- <li><b>Stemming and Lemmatization</b>Both are purely based on problm statement and if problem statement wants grammatical concept don't perform stemming</li></li>
106
  </ul>
107
  </div>
108
  ''',
 
59
  st.markdown(
60
  '''
61
  <div class='section'>
62
+ We will convert raw data into pre-processed data in 3 ways:
63
+
64
+ - Cleaning ---> which is based on the problem statement
65
+
66
+ - Simple pre-processing
67
+
68
+ - Advance pre-processing
69
  </div>
70
  ''',
71
  unsafe_allow_html=True,
72
  )
73
  st.markdown('''
74
  - Take a raw text and convert every character and word into single case
75
+
76
  - either upper case
77
 
78
  - or lower case
 
95
  '''
96
  <div class='section'>
97
  Converts raw data into pre-processed data
98
+
99
+ - which has 2 benefits:
100
 
101
  - Reduce the dimensionality ---> to increase the performance of ML
102
 
 
106
  <li><b>Removing URL's / tags/mails/mentions</b> Converting or preserving information should be based on the problem statement</li>
107
  <li><b>Handling Emoji's</b> Emoji's data should be preserved</li>
108
  <li><b>Contractions and acronyms</b>Both the contractions and acronyms should be converted into general text</li>
109
+ <li><b>Stop Words</b> Stop words make the grammar very clear</li>
110
+ <li><b>Stemming and Lemmatization</b>Both are purely based on problm statement and if problem statement wants grammatical concept don't perform stemming</li>
111
  </ul>
112
  </div>
113
  ''',