LakshmiHarika commited on
Commit
90a4426
·
verified ·
1 Parent(s): 4306ac5

Update pages/Data Collection.py

Browse files
Files changed (1) hide show
  1. pages/Data Collection.py +172 -4
pages/Data Collection.py CHANGED
@@ -273,6 +273,9 @@ if st.session_state.current_page == "main":
273
  navigate_to("explore_html")
274
 
275
 
 
 
 
276
  # Page for Explore Excel
277
  if st.session_state.current_page == "explore_excel":
278
  # Main Heading
@@ -479,6 +482,10 @@ if st.session_state.current_page == "explore_excel":
479
  with col2:
480
  if st.button("⬅️ Back to Previous Page"):
481
  st.session_state.current_page = "main"
 
 
 
 
482
 
483
  elif st.session_state.current_page == "explore_images_video":
484
  st.markdown("""
@@ -1775,6 +1782,9 @@ elif st.session_state.current_page == "image_transformations":
1775
  cv2.destroyAllWindows()
1776
  """, language="python")
1777
 
 
 
 
1778
 
1779
 
1780
  elif st.session_state.current_page == "explore_audio":
@@ -1788,6 +1798,10 @@ elif st.session_state.current_page == "explore_audio":
1788
  navigate_to("main")
1789
 
1790
 
 
 
 
 
1791
  elif st.session_state.current_page == "explore_text":
1792
  st.markdown("""
1793
  <h3 style="color: #e25822;">Exploring Text</h3>
@@ -1798,15 +1812,160 @@ elif st.session_state.current_page == "explore_text":
1798
  if st.button("Go Back"):
1799
  navigate_to("main")
1800
 
 
 
 
 
 
1801
  elif st.session_state.current_page == "explore_csv":
1802
  st.markdown("""
1803
- <h3 style="color: #e25822;">Exploring CSV</h3>
1804
  """, unsafe_allow_html=True)
 
1805
  st.write("""
1806
- CSV is a simple text-based format where data fields are separated by commas.
 
 
 
 
 
1807
  """)
1808
- if st.button("Go Back"):
1809
- navigate_to("main")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1810
 
1811
  elif st.session_state.current_page == "explore_json":
1812
  st.markdown("""
@@ -1818,6 +1977,10 @@ elif st.session_state.current_page == "explore_json":
1818
  if st.button("Go Back"):
1819
  navigate_to("main")
1820
 
 
 
 
 
1821
  elif st.session_state.current_page == "explore_xml":
1822
  st.markdown("""
1823
  <h3 style="color: #e25822;">Exploring XML</h3>
@@ -1828,6 +1991,11 @@ elif st.session_state.current_page == "explore_xml":
1828
  if st.button("Go Back"):
1829
  navigate_to("main")
1830
 
 
 
 
 
 
1831
  elif st.session_state.current_page == "explore_html":
1832
  st.markdown("""
1833
  <h3 style="color: #e25822;">Exploring HTML</h3>
 
273
  navigate_to("explore_html")
274
 
275
 
276
+
277
+ #--------------------------------------------------------- Excel--------------------------------------------------------------------------------
278
+
279
  # Page for Explore Excel
280
  if st.session_state.current_page == "explore_excel":
281
  # Main Heading
 
482
  with col2:
483
  if st.button("⬅️ Back to Previous Page"):
484
  st.session_state.current_page = "main"
485
+
486
+
487
+ #--------------------------------------------------------- Images and Video --------------------------------------------------------------------------------
488
+
489
 
490
  elif st.session_state.current_page == "explore_images_video":
491
  st.markdown("""
 
1782
  cv2.destroyAllWindows()
1783
  """, language="python")
1784
 
1785
+
1786
+
1787
+ #--------------------------------------------------------- Audio--------------------------------------------------------------------------------
1788
 
1789
 
1790
  elif st.session_state.current_page == "explore_audio":
 
1798
  navigate_to("main")
1799
 
1800
 
1801
+ #--------------------------------------------------------- text--------------------------------------------------------------------------------
1802
+
1803
+
1804
+
1805
  elif st.session_state.current_page == "explore_text":
1806
  st.markdown("""
1807
  <h3 style="color: #e25822;">Exploring Text</h3>
 
1812
  if st.button("Go Back"):
1813
  navigate_to("main")
1814
 
1815
+
1816
+ #--------------------------------------------------------- CSV --------------------------------------------------------------------------------
1817
+
1818
+
1819
+
1820
  elif st.session_state.current_page == "explore_csv":
1821
  st.markdown("""
1822
+ <h3 style="color: #BB3385;">Comma-Separated Values(CSV)</h3>
1823
  """, unsafe_allow_html=True)
1824
+
1825
  st.write("""
1826
+ - **CSV (Comma-Separated Values)** is a simple file format used to store tabular data.
1827
+ - Each row in a CSV file corresponds to a row of data, with columns separated by delimeter(default Commas).
1828
+ - It is widely used for:
1829
+ - Data storage and exchange between applications.
1830
+ - Importing/exporting data in tools like Excel, databases, and programming languages.
1831
+ - CSV files are lightweight, easy to read, and supported by most data-handling tools.
1832
  """)
1833
+
1834
+ st.markdown("""
1835
+ <h3 style="color: #5b2c6f;">Reading CSV File</h3>
1836
+ """, unsafe_allow_html=True)
1837
+
1838
+ st.code("""
1839
+ # Read the CSV file
1840
+ data = pd.read_csv('path_to_file.csv')
1841
+ print(data.head()) # Displays the first 5 rows
1842
+ """, language="python")
1843
+
1844
+ st.markdown("""
1845
+ <h3 style="color: #5b2c6f;">Exporting CSV File</h3>
1846
+ """, unsafe_allow_html=True)
1847
+
1848
+ st.code("""
1849
+ import pandas as pd
1850
+
1851
+ # Sample DataFrame
1852
+ data = pd.DataFrame({
1853
+ 'sepal_length': [1.5, 1.4, 1.5],
1854
+ 'sepal_width': [2.5, 2.8, 2.5],
1855
+ 'petal_length': [2.3, 2.2, 2.3],
1856
+ 'petal_width': [1.5, 1.1, 1.5],
1857
+ 'species': ['setosa', 'versicolor', 'virginica']
1858
+ })
1859
+
1860
+ # Export the DataFrame to a CSV file
1861
+ data.to_csv('iris_dataset.csv', index=False)
1862
+ """, language="python")
1863
+
1864
+ # Issues Section
1865
+ st.markdown("""
1866
+ <h3 style="color: #5b2c6f;">Common Issues with CSV Files</h3>
1867
+ """, unsafe_allow_html=True)
1868
+ st.markdown("""
1869
+ <h3 style="color: #BB3385;">1. ParserError</h3>
1870
+ """, unsafe_allow_html=True)
1871
+
1872
+ st.write("""
1873
+ - This error occurs when there is a mismatch in the number of columns in some rows.
1874
+ - It is commonly caused when a CSV file is manually edited in a text editor, leading to structural inconsistencies.
1875
+
1876
+ - Use the `on_bad_lines='warn'` or `on_bad_lines='skip'` parameter in `pd.read_csv()` to handle problematic rows.
1877
+ - Clean the CSV file to ensure consistent formatting.
1878
+ """)
1879
+
1880
+ st.code("""
1881
+ # Reading the CSV file with bad lines handled
1882
+ df = pd.read_csv('sample.csv', on_bad_lines='warn')
1883
+ print(df)
1884
+ """, language="python")
1885
+
1886
+ st.markdown("""
1887
+ <h3 style="color: #BB3385;">1. Encoding Error</h3>
1888
+ """, unsafe_allow_html=True)
1889
+
1890
+ st.write("""
1891
+ - Encoding is the process of converting text from one representation to another.
1892
+ - It specifies how characters are stored and interpreted in files or streams.
1893
+ - Common encoding formats include:
1894
+ - **UTF-8**: The most widely used encoding format, compatible with most languages.
1895
+ - **ISO-8859-1** (Latin-1): Often used for Western European languages.
1896
+ - **ASCII**: Represents basic English characters.
1897
+ """)
1898
+
1899
+ st.write("""
1900
+ - This error occurs when a CSV file is saved with a different encoding format (e.g., `UTF-8`, `ISO-8859-1`).
1901
+ - It often results in unreadable characters or errors while loading the file in Python.
1902
+
1903
+ - Use the `encoding` parameter in `pd.read_csv()` to specify the correct encoding format.
1904
+ - Common encodings to try:
1905
+ - `encoding='utf-8'`
1906
+ - `encoding='latin1'`
1907
+ - `encoding='iso-8859-1'`
1908
+ """)
1909
+
1910
+ st.code("""
1911
+ df = pd.read_csv('sample.csv', encoding='utf-8') # Specify the encoding format
1912
+ print(df)
1913
+ """, language="python")
1914
+
1915
+ st.write("""
1916
+ To identify the correct encoding for a CSV file, we can iterate through all possible encodings and try to read the file.
1917
+ This approach helps when the encoding of the file is unknown.
1918
+ """)
1919
+
1920
+ st.code("""
1921
+ import encodings
1922
+
1923
+ # Get all possible encodings
1924
+ encoding_list = encodings.aliases.aliases.keys()
1925
+
1926
+ # Check which encoding works
1927
+ for encoding in encoding_list:
1928
+ try:
1929
+ # Attempt to read the file with the current encoding
1930
+ pd.read_csv(file_path, encoding=encoding)
1931
+ print(f"{encoding} is correct encoding")
1932
+ except UnicodeDecodeError:
1933
+ print(f"{encoding} is not correct encoding")
1934
+ """, language="python")
1935
+
1936
+
1937
+
1938
+ st.markdown("""
1939
+ <h3 style="color: #BB3385;">1. Out of Memory Issue</h3>
1940
+ """, unsafe_allow_html=True)
1941
+
1942
+ st.write("""
1943
+ - Out of Memory issues occur when the size of the CSV file is too large to fit into the available memory (RAM) of the system.
1944
+ - This typically happens when:
1945
+ - Files contain millions of rows or a large number of columns.
1946
+ - The system's RAM is insufficient to load the entire file at once.
1947
+
1948
+ - Read the CSV file in smaller chunks using the `chunksize` parameter in `pd.read_csv()`.
1949
+ - Process and save chunks incrementally to avoid memory overload.
1950
+ - Use efficient data types to reduce memory usage.
1951
+ """)
1952
+ st.code("""
1953
+ # Reading a large CSV file in chunks
1954
+ chunk_size = 100000 # Number of rows per chunk
1955
+ chunks = []
1956
+
1957
+ for chunk in pd.read_csv('large_file.csv', chunksize = chunk_size):
1958
+ chunks.append(chunk)
1959
+
1960
+ # Combine all chunks into a single DataFrame if needed
1961
+ df = pd.concat(chunks, axis=0)
1962
+ print(df)
1963
+ """, language="python")
1964
+
1965
+
1966
+ #--------------------------------------------------------- Json --------------------------------------------------------------------------------
1967
+
1968
+
1969
 
1970
  elif st.session_state.current_page == "explore_json":
1971
  st.markdown("""
 
1977
  if st.button("Go Back"):
1978
  navigate_to("main")
1979
 
1980
+
1981
+ #--------------------------------------------------------- XML -------------------------------------------------------------------------------
1982
+
1983
+
1984
  elif st.session_state.current_page == "explore_xml":
1985
  st.markdown("""
1986
  <h3 style="color: #e25822;">Exploring XML</h3>
 
1991
  if st.button("Go Back"):
1992
  navigate_to("main")
1993
 
1994
+
1995
+ #--------------------------------------------------------- HTML --------------------------------------------------------------------------------
1996
+
1997
+
1998
+
1999
  elif st.session_state.current_page == "explore_html":
2000
  st.markdown("""
2001
  <h3 style="color: #e25822;">Exploring HTML</h3>