Phani1008 commited on
Commit
b6d3da9
Β·
verified Β·
1 Parent(s): b10d4b1

Update pages/Types of Data.py

Browse files
Files changed (1) hide show
  1. pages/Types of Data.py +385 -28
pages/Types of Data.py CHANGED
@@ -43,44 +43,401 @@ st.sidebar.subheader("πŸ” Explore Data Types")
43
  # Dropdown to select data type to learn about
44
  data_type = st.sidebar.selectbox(
45
  "Select a Type of Data:",
46
- ["Structured Data", "Unstructured Data", "Semi-Structured Data", "Time-Series Data", "Spatial Data"]
47
  )
48
 
49
  # Display information based on selected data type
50
  if data_type == "Structured Data":
51
  st.subheader("πŸ“‹ Structured Data")
52
- st.write(
53
- "Structured data is organized and stored in databases with clear formats, such as rows and columns."
54
- " Examples include Excel files, SQL databases, and CSV files."
55
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  elif data_type == "Unstructured Data":
58
  st.subheader("πŸ—‚οΈ Unstructured Data")
59
- st.write(
60
- "Unstructured data lacks a predefined format and cannot be stored easily in relational databases."
61
- " Examples include images, videos, and social media posts."
62
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  elif data_type == "Semi-Structured Data":
65
  st.subheader("πŸ“„ Semi-Structured Data")
66
- st.write(
67
- "Semi-structured data falls between structured and unstructured data."
68
- " It contains tags and markers to separate data but does not fit into traditional databases."
69
- " Examples include JSON, XML, and YAML files."
70
- )
71
-
72
- elif data_type == "Time-Series Data":
73
- st.subheader("⏱️ Time-Series Data")
74
- st.write(
75
- "Time-series data represents information recorded at specific time intervals."
76
- " Examples include stock prices, weather data, and sensor readings."
77
- )
78
-
79
- elif data_type == "Spatial Data":
80
- st.subheader("πŸ—ΊοΈ Spatial Data")
81
- st.write(
82
- "Spatial data represents the location and shape of objects in space."
83
- " Examples include GPS data, maps, and satellite imagery."
84
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  st.sidebar.success("Select a data type to learn more!")
 
43
  # Dropdown to select data type to learn about
44
  data_type = st.sidebar.selectbox(
45
  "Select a Type of Data:",
46
+ ["Structured Data", "Unstructured Data", "Semi-Structured Data"]
47
  )
48
 
49
  # Display information based on selected data type
50
  if data_type == "Structured Data":
51
  st.subheader("πŸ“‹ Structured Data")
52
+ st.write("""
53
+ **Definition**: Structured data refers to data that is organized and stored in a predefined format like rows and columns, making it easily searchable and manageable.
54
+ It is highly organized, and each data point is placed into a defined structure.
55
+ """)
56
+
57
+ st.write("**Features**:")
58
+ st.markdown("""
59
+ - Fixed schema (e.g., tables with defined columns and data types).
60
+ - Easy to process and analyze using query languages like SQL.
61
+ - Relationships between data points are well-defined.
62
+ """)
63
+
64
+ st.write("**Examples of Structured Data**:")
65
+ st.markdown("""
66
+ 1. **Excel Files** πŸ“Š
67
+ 2. **MySQL Databases** πŸ’Ύ
68
+ """)
69
+
70
+
71
+ # Buttons for Structured Data Examples
72
+ if st.button("Show Excel Files πŸ“‚"):
73
+ st.subheader("Excel Files")
74
+ st.write("""
75
+ Excel files store structured data in rows and columns. They allow for easy calculations, analysis, and data manipulation using formulas or pivot tables.
76
+ Excel is a widely used tool in business, finance, and data analytics.
77
+ """)
78
+ # In the Structured Data page, add the following for the Excel button:
79
+ st.subheader("Excel Files")
80
+ st.write("""
81
+ **Excel** is a spreadsheet application developed by Microsoft. It stores structured data in rows and columns,
82
+ making it ideal for data analysis, calculations, and visualization.
83
+ **Key Features of Excel:**
84
+ - Store, analyze, and visualize data in tabular format.
85
+ - Support for formulas, functions, and pivot tables for advanced data manipulation.
86
+ - Integration with other applications and databases.
87
+ - Support for multiple sheets in a single workbook.
88
+ **Common Extensions:**
89
+ - `.xlsx` (default format for modern Excel)
90
+ - `.xls` (older format for Excel)
91
+ - `.csv` (Comma-Separated Values, compatible with Excel)
92
+ **How to Handle Excel Files in Python:**
93
+ Python provides libraries like `pandas` and `openpyxl` for reading, writing, and processing Excel files.
94
+ """)
95
+
96
+ st.write("### Convert Excel to CSV πŸ“„")
97
+ st.code("""
98
+ import pandas as pd
99
+ # Convert a single Excel sheet to CSV
100
+ def excel_to_csv(excel_file, csv_file):
101
+ df = pd.read_excel(excel_file) # Read the Excel file
102
+ df.to_csv(csv_file, index=False) # Save as CSV
103
+ print(f"Excel file converted to {csv_file}")
104
+ # Example usage
105
+ excel_to_csv('input_file.xlsx', 'output_file.csv')
106
+ """, language="python")
107
+
108
+ st.write("### Convert Multiple Sheets to CSV πŸ“„")
109
+ st.code("""
110
+ import pandas as pd
111
+ # Convert all sheets in an Excel file to separate CSV files
112
+ def excel_sheets_to_csv(excel_file, output_dir):
113
+ # Read all sheets
114
+ sheets = pd.read_excel(excel_file, sheet_name=None)
115
+ for sheet_name, data in sheets.items():
116
+ csv_file = f"{output_dir}/{sheet_name}.csv" # Name CSV files by sheet name
117
+ data.to_csv(csv_file, index=False)
118
+ print(f"Sheet '{sheet_name}' converted to {csv_file}")
119
+ # Example usage
120
+ excel_sheets_to_csv('input_file.xlsx', 'output_directory')
121
+ """, language="python")
122
+
123
+ # Placeholder button for GitHub link
124
+ if st.button("GitHub Link πŸ”—"):
125
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
126
+
127
+ # Optional: Add an animation for Excel
128
+ excel_animation_url = "https://assets9.lottiefiles.com/packages/lf20_ktn4ouly.json" # Example Lottie URL for Excel
129
+ excel_animation = load_lottie_url(excel_animation_url)
130
+ if excel_animation:
131
+ st_lottie(excel_animation, height=300, key="excel_animation")
132
+
133
+ if st.button("Show MySQL Databases πŸ’»"):
134
+ st.subheader("MySQL Databases")
135
+ st.write("""
136
+ MySQL is a relational database management system that stores structured data in tables. SQL (Structured Query Language) is used to query and manipulate data in these databases.
137
+ It is commonly used in web applications and enterprise systems.
138
+ """)
139
+
140
+ st.write("""
141
+ **MySQL** is an open-source relational database management system (RDBMS) that stores structured data in tables.
142
+ It is widely used for managing and organizing data in web applications, enterprise systems, and data-driven projects.
143
+ **Key Features of MySQL:**
144
+ - High performance, scalability, and reliability.
145
+ - Support for SQL (Structured Query Language) for querying and managing data.
146
+ - Multi-user access and role-based permissions.
147
+ - Integration with multiple programming languages like Python, PHP, Java, etc.
148
+ **Common Use Cases:**
149
+ - Web application backends (e.g., WordPress, e-commerce platforms).
150
+ - Data analytics and reporting.
151
+ - Content management systems (CMS).
152
+
153
+ **MySQL Extensions:**
154
+ - `.sql`: Standard file extension for SQL database dumps.
155
+ - `.db`: Extension used by certain database systems but can also represent MySQL databases.
156
+ """)
157
+
158
+ st.write("""
159
+ ### Advantages of MySQL:
160
+ - Open-source and free to use.
161
+ - Cross-platform support (Windows, Linux, macOS).
162
+ - Regular updates and strong community support.
163
+ - Supports ACID compliance for data reliability.
164
+ ### Limitations of MySQL:
165
+ - Not as feature-rich as some enterprise-level database systems (e.g., Oracle, MS SQL Server).
166
+ - Limited support for advanced analytics and distributed databases.
167
+ """)
168
+
169
+ # Placeholder button for GitHub link
170
+ if st.button("GitHub Link πŸ”—"):
171
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
172
+
173
+ # Optional: Add an animation for MySQL
174
+ mysql_animation_url = "https://assets10.lottiefiles.com/packages/lf20_kq5msyia.json" # Example Lottie URL for MySQL
175
+ mysql_animation = load_lottie_url(mysql_animation_url)
176
+ if mysql_animation:
177
+ st_lottie(mysql_animation, height=300, key="mysql_animation")
178
+
179
+
180
 
181
  elif data_type == "Unstructured Data":
182
  st.subheader("πŸ—‚οΈ Unstructured Data")
183
+ st.write("""
184
+ **Definition**: Unstructured data lacks any predefined structure or schema, making it the most difficult to organize and analyze.
185
+ It is typically raw and needs advanced processing to extract insights.
186
+ """)
187
+
188
+ st.write("**Features**:")
189
+ st.markdown("""
190
+ - Free-form; not stored in a tabular format.
191
+ - Requires specialized tools like AI or machine learning to analyze.
192
+ """)
193
+
194
+ st.write("**Examples of Unstructured Data**:")
195
+ st.markdown("""
196
+ 1. **Images πŸ–ΌοΈ**
197
+ 2. **Videos πŸŽ₯**
198
+ 3. **Audio πŸ”Š**
199
+ 4. **Text πŸ–Ή**
200
+ """)
201
+
202
+ # Buttons for Unstructured Data Examples
203
+ if st.button("Show Image πŸ“·"):
204
+ st.subheader("Working with Images")
205
+ st.write("""
206
+ **Images** are one of the most common forms of unstructured data. They are represented as a grid of pixels, each having color information (RGB or grayscale). Images are used in various domains such as computer vision, medical imaging, and entertainment.
207
+ **Common File Formats:**
208
+ - `.jpg` or `.jpeg` (Joint Photographic Experts Group)
209
+ - `.png` (Portable Network Graphics)
210
+ - `.bmp` (Bitmap Image File)
211
+ - `.tiff` (Tagged Image File Format)
212
+ """)
213
+
214
+ st.write("### Steps to Convert an Image into an Array πŸ“Š")
215
+ st.write("""
216
+ Converting an image into a numerical array is a key step in image processing. Here's how it's typically done:
217
+ 1. Load the image using an image processing library (e.g., OpenCV or PIL).
218
+ 2. Convert the image into a NumPy array.
219
+ 3. Access pixel data for analysis or manipulation.
220
+ """)
221
+
222
+ st.write("#### Example Code: Converting an Image into an Array")
223
+ st.code("""
224
+ import cv2
225
+ import numpy as np
226
+ # Load the image
227
+ image_path = 'image.jpg' # Path to the image
228
+ image = cv2.imread(image_path) # Load image as BGR format
229
+ # Convert to NumPy array
230
+ image_array = np.array(image)
231
+ # Display shape and pixel data
232
+ print("Image Shape:", image_array.shape) # (Height, Width, Channels)
233
+ print("Pixel Data (Top-left):", image_array[0, 0]) # Pixel value at (0, 0)
234
+ """, language="python")
235
+
236
+ # Placeholder button for GitHub link
237
+ if st.button("Jupyter Notebook πŸ”— (Image)"):
238
+ st.markdown("https://colab.research.google.com/drive/1BxJuxD1mzeuDnPIjwc-06J_GIU2JgO1_?usp=sharing")
239
+
240
+ if st.button("Show Video πŸŽ₯"):
241
+ st.subheader("Working with Videos")
242
+ st.write("""
243
+ **Videos** are sequences of images (frames) that are displayed at a specific frame rate to create a moving picture. Videos are used in surveillance, entertainment, and machine learning applications like activity recognition and object detection.
244
+ **Common File Formats:**
245
+ - `.mp4` (MPEG-4 Part 14)
246
+ - `.avi` (Audio Video Interleave)
247
+ - `.mov` (QuickTime File Format)
248
+ - `.mkv` (Matroska Video File Format)
249
+ """)
250
+
251
+ st.write("### Steps to Convert a Video into Frames πŸ“Έ")
252
+ st.write("""
253
+ Breaking a video into individual frames is an important step in video analysis. Here's how it's done:
254
+ 1. Load the video using a video processing library like OpenCV.
255
+ 2. Loop through each frame and save or process it.
256
+ 3. Save the frames as images for further processing.
257
+ """)
258
+
259
+ st.write("#### Example Code: Converting a Video into Frames")
260
+ st.code("""
261
+ import cv2
262
+ import os
263
+ # Load the video
264
+ video_path = 'video.mp4' # Path to the video
265
+ video = cv2.VideoCapture(video_path)
266
+ # Create a folder to store the frames
267
+ output_folder = 'frames'
268
+ os.makedirs(output_folder, exist_ok=True)
269
+ frame_number = 0
270
+ while True:
271
+ ret, frame = video.read() # Read the next frame
272
+ if not ret:
273
+ break # Exit if no frames are left
274
+ # Save the frame as an image
275
+ frame_path = os.path.join(output_folder, f'frame_{frame_number:04d}.jpg')
276
+ cv2.imwrite(frame_path, frame)
277
+ frame_number += 1
278
+ print(f"Extracted {frame_number} frames and saved to {output_folder}")
279
+ video.release()
280
+ """, language="python")
281
+
282
+ # Placeholder button for GitHub link
283
+ if st.button("GitHub Link πŸ”— (Video)"):
284
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
285
+
286
+
287
+ if st.button("Show Audio πŸ”Š"):
288
+ st.subheader("Audio")
289
+ st.write("""
290
+ Social media posts, such as tweets, Facebook updates, or Instagram images, represent unstructured data. They contain a mix of text, images, and metadata and require NLP (Natural Language Processing) for analysis.
291
+ """)
292
+
293
+ if st.button("Show Text πŸ“"):
294
+ st.subheader("Text")
295
+ st.write("""
296
+ Social media posts, such as tweets, Facebook updates, or Instagram images, represent unstructured data. They contain a mix of text, images, and metadata and require NLP (Natural Language Processing) for analysis.
297
+ """)
298
+
299
+ # Footer
300
+ st.write("This app provides a clear understanding of data and its various types, especially based on structure. πŸŽ‰")
301
+
302
 
303
  elif data_type == "Semi-Structured Data":
304
  st.subheader("πŸ“„ Semi-Structured Data")
305
+
306
+ st.write("""
307
+ **Definition**: Semi-structured data does not have a strict table-based format but is partially organized using tags, markers, or key-value pairs.
308
+ While it is more flexible than structured data, it still has some organizational components.
309
+ """)
310
+
311
+ st.write("**Features**:")
312
+ st.markdown("""
313
+ - Flexible schema; not bound to a rigid structure.
314
+ - Easier to manage than unstructured data but more complex than structured data.
315
+ """)
316
+
317
+ st.write("**Examples of Semi-Structured Data**:")
318
+ st.markdown("""
319
+ 1. **JSON Files** πŸ“‘
320
+ 2. **XML Files** 🌐
321
+ """)
322
+
323
+ # Buttons for Semi-Structured Data Examples
324
+ if st.button("Show JSON Files πŸ“„"):
325
+ st.subheader("JSON Files")
326
+ st.write("""
327
+ **JSON (JavaScript Object Notation)** is a lightweight data-interchange format. It is easy for humans to read and write, and it is easy for machines to parse and generate. JSON is widely used to transmit data between a server and a web application.
328
+ **Key Features of JSON:**
329
+ - Stores data as key-value pairs.
330
+ - Supports nested structures, such as arrays and objects.
331
+ - Language-independent but derived from JavaScript.
332
+ **Common Use Cases:**
333
+ - API responses and requests in web development.
334
+ - Configuration files for applications.
335
+ - Data serialization and exchange in distributed systems.
336
+ **File Extension:**
337
+ - `.json`
338
+ **Advantages of JSON:**
339
+ - Lightweight and compact.
340
+ - Human-readable and easy to understand.
341
+ - Supported by most modern programming languages.
342
+ **Limitations of JSON:**
343
+ - Does not support comments.
344
+ - Less efficient for very large datasets compared to binary formats.
345
+ """)
346
+
347
+ st.write("### Python Example: Working with JSON πŸ“„")
348
+ st.write("#### Reading a JSON File and Accessing Its Data")
349
+ st.code("""
350
+ import json
351
+ # Reading a JSON file
352
+ with open('data.json', 'r') as file:
353
+ data = json.load(file)
354
+ # Accessing data
355
+ print("Name:", data['name'])
356
+ print("Age:", data['age'])
357
+ """, language="python")
358
+
359
+ st.write("#### Writing Data to a JSON File")
360
+ st.code("""
361
+ # Writing data to a JSON file
362
+ new_data = {
363
+ "name": "John Doe",
364
+ "age": 30,
365
+ "city": "New York"
366
+ }
367
+ with open('output.json', 'w') as file:
368
+ json.dump(new_data, file, indent=4)
369
+ print("Data saved to output.json")
370
+ """, language="python")
371
+
372
+ # Placeholder button for GitHub link
373
+ if st.button("GitHub Link πŸ”— (JSON)"):
374
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
375
+
376
+ # Optional: Add animation for JSON
377
+ json_animation_url = "https://assets9.lottiefiles.com/packages/lf20_9jdtwwzw.json" # Example Lottie URL for JSON
378
+ json_animation = load_lottie_url(json_animation_url)
379
+ if json_animation:
380
+ st_lottie(json_animation, height=300, key="json_animation")
381
+
382
+ # XML Button
383
+ if st.button("Show XML Files πŸ“„"):
384
+ st.subheader("XML Files")
385
+ st.write("""
386
+ **XML (eXtensible Markup Language)** is a markup language designed to store and transport data. XML emphasizes simplicity, generality, and usability across the Internet.
387
+ **Key Features of XML:**
388
+ - Data is stored in a tree-like structure with nested elements.
389
+ - Customizable tags allow flexibility in representing data.
390
+ - Both human-readable and machine-readable.
391
+ **Common Use Cases:**
392
+ - Data interchange between systems.
393
+ - Configuration files for applications and servers.
394
+ - RSS feeds and web services (e.g., SOAP).
395
+ **File Extension:**
396
+ - `.xml`
397
+ **Advantages of XML:**
398
+ - Highly flexible and customizable.
399
+ - Self-descriptive and easy to understand.
400
+ - Widely supported in web and enterprise applications.
401
+ **Limitations of XML:**
402
+ - More verbose compared to JSON.
403
+ - Slower to parse and larger in size.
404
+ """)
405
+
406
+ st.write("### Python Example: Working with XML πŸ“„")
407
+ st.write("#### Reading an XML File and Parsing Its Data")
408
+ st.code("""
409
+ import xml.etree.ElementTree as ET
410
+ # Parsing an XML file
411
+ tree = ET.parse('data.xml')
412
+ root = tree.getroot()
413
+ # Accessing data
414
+ for child in root:
415
+ print(child.tag, ":", child.text)
416
+ """, language="python")
417
+
418
+ st.write("#### Writing Data to an XML File")
419
+ st.code("""
420
+ import xml.etree.ElementTree as ET
421
+ # Creating an XML structure
422
+ root = ET.Element("person")
423
+ name = ET.SubElement(root, "name")
424
+ name.text = "John Doe"
425
+ age = ET.SubElement(root, "age")
426
+ age.text = "30"
427
+ # Writing to a file
428
+ tree = ET.ElementTree(root)
429
+ tree.write("output.xml")
430
+ print("Data saved to output.xml")
431
+ """, language="python")
432
+
433
+ # Placeholder button for GitHub link
434
+ if st.button("GitHub Link πŸ”— (XML)"):
435
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
436
+
437
+ # Optional: Add animation for XML
438
+ xml_animation_url = "https://assets7.lottiefiles.com/packages/lf20_7ozhpxio.json" # Example Lottie URL for XML
439
+ xml_animation = load_lottie_url(xml_animation_url)
440
+ if xml_animation:
441
+ st_lottie(xml_animation, height=300, key="xml_animation")
442
 
443
  st.sidebar.success("Select a data type to learn more!")