Harika22 commited on
Commit
896d8db
Β·
verified Β·
1 Parent(s): 1d0bfe1

Update pages/4_Simple_EDA.py

Browse files
Files changed (1) hide show
  1. pages/4_Simple_EDA.py +23 -20
pages/4_Simple_EDA.py CHANGED
@@ -77,27 +77,30 @@ st.markdown("""
77
  </style>
78
  """, unsafe_allow_html=True)
79
 
80
- st.header(":red[Simple EDAπŸ’¬]")
81
- st.markdown('''
82
- - Simple EDA is a part of life cycle in NLP where after collecting the raw data we need to perform simple eda which tells the quallty of the data
83
- - Simpl EDA is not performed based on the probelm statement
84
- - It checks the exploration of the data
85
- ''')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- st.subheader(":violet[Major Simple EDAπŸ“ƒ]")
88
- st.markdown('''
89
- - Whether all the alphabets are in
90
- - lower case
91
- - upper case
92
- - combination of lower and upper case
93
- - Whether the collected text data contains any html / url tags
94
- - Whether the collected text data contains any urls
95
- - Whether the collected text data contains any mentions / hashtags
96
- - Whether the collected text data contains any digits
97
- - Whether the collected text data contains any punctuations
98
- - Whether the collected text data contains any emojis
99
- - Whether the collected text data contains any data /time
100
- ''')
101
 
102
  st.code('''
103
  import pandas as pd
 
77
  </style>
78
  """, unsafe_allow_html=True)
79
 
80
+ st.header(":red[πŸ“Š Simple EDA πŸ’¬]")
81
+
82
+ # Introduction to Simple EDA
83
+ st.markdown("<div class='section'>", unsafe_allow_html=True)
84
+ st.markdown("<h2 class='title'>πŸ” Understanding Simple EDA</h2>", unsafe_allow_html=True)
85
+ st.markdown("<p class='subtitle'>Evaluating raw text data quality before processing</p>", unsafe_allow_html=True)
86
+
87
+ st.info("πŸ“Œ **Simple EDA is a crucial step in the NLP lifecycle:**\n\nβœ… Ensures raw data quality\n\nβœ… Not dependent on problem statement\n\nβœ… Helps in better data exploration")
88
+
89
+ st.markdown("</div>", unsafe_allow_html=True)
90
+
91
+ st.subheader(":violet[πŸ“ƒ Major Simple EDA Steps]")
92
+
93
+ st.markdown("βœ… **Check Text Case** – Identify if text is in **lowercase, uppercase, or mixed case**.")
94
+ st.markdown("βœ… **Detect HTML & URL Tags** – Analyze if text contains unwanted elements.")
95
+ st.markdown("βœ… **Identify URLs** – Ensure URLs are either preserved or removed based on problem statement.")
96
+ st.markdown("βœ… **Detect Mentions & Hashtags** – Find occurrences of `@mentions` or `#hashtags`.")
97
+ st.markdown("βœ… **Identify Numeric Data** – Detect if text includes **digits or numerical data**.")
98
+ st.markdown("βœ… **Analyze Punctuation Usage** – Check whether punctuation marks affect text clarity.")
99
+ st.markdown("βœ… **Detect Emojis** – Ensure **emoji-based sentiments** are not lost.")
100
+ st.markdown("βœ… **Analyze Date/Time Formats** – Identify the presence of date/time-related text.")
101
+
102
+ st.success("πŸš€ Performing **Simple EDA** ensures structured and high-quality text data, leading to better NLP model performance!")
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  st.code('''
106
  import pandas as pd