Phani1008 commited on
Commit
f6ad9db
Β·
verified Β·
1 Parent(s): c7bf09f

Create Types of Data.py

Browse files
Files changed (1) hide show
  1. pages/Types of Data.py +444 -0
pages/Types of Data.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_lottie import st_lottie
3
+ import requests
4
+
5
+ # Function to load Lottie animation from a URL
6
+ def load_lottie_url(url: str):
7
+ r = requests.get(url)
8
+ if r.status_code != 200:
9
+ return None
10
+ return r.json()
11
+
12
+ # Load animations using URLs
13
+ structured_animation_url = "https://assets10.lottiefiles.com/packages/lf20_4j6cnjjm.json" # Example URL for structured data
14
+ semi_structured_animation_url = "https://assets10.lottiefiles.com/packages/lf20_0fhcmhgf.json" # Example URL for semi-structured data
15
+ unstructured_animation_url = "https://assets10.lottiefiles.com/packages/lf20_rekwjvy0.json" # Example URL for unstructured data
16
+
17
+ # Sidebar navigation
18
+ st.sidebar.title("Navigation")
19
+ page = st.sidebar.radio("Choose a page", ["Home", "Structured Data", "Semi-Structured Data", "Unstructured Data"])
20
+
21
+ # Home Page: Overview of What is Data and Types of Data
22
+ if page == "Home":
23
+ st.title("Understanding Data and Its Types 🌐")
24
+
25
+ st.header("What is Data?")
26
+ st.write("""
27
+ **Data** refers to raw facts, figures, or information that can be collected, measured, and analyzed for specific purposes.
28
+ It serves as the foundation for generating insights, making decisions, and solving problems in various fields like business,
29
+ science, and technology. 🧠
30
+ """)
31
+
32
+ st.header("Types of Data πŸ“Š")
33
+ st.write("Data can exist in various forms depending on its source and nature. Common forms include:")
34
+ st.markdown("""
35
+ 1. **Structured Data**: Data organized in a predefined format, making it easily searchable and manageable.
36
+ 2. **Semi-Structured Data**: Data that does not have a strict schema but is partially organized using tags or markers.
37
+ 3. **Unstructured Data**: Data without any predefined structure, requiring specialized tools to analyze.
38
+ """)
39
+
40
+ # Structured Data Page
41
+ elif page == "Structured Data":
42
+ st.title("Structured Data πŸ“‹")
43
+ animation = load_lottie_url(structured_animation_url)
44
+ if animation:
45
+ st_lottie(animation, height=300, key="structured_animation")
46
+
47
+ st.write("""
48
+ **Definition**: Structured data refers to data that is organized and stored in a predefined format like rows and columns, making it easily searchable and manageable.
49
+ It is highly organized, and each data point is placed into a defined structure.
50
+ """)
51
+
52
+ st.write("**Features**:")
53
+ st.markdown("""
54
+ - Fixed schema (e.g., tables with defined columns and data types).
55
+ - Easy to process and analyze using query languages like SQL.
56
+ - Relationships between data points are well-defined.
57
+ """)
58
+
59
+ st.write("**Examples of Structured Data**:")
60
+ st.markdown("""
61
+ 1. **Excel Files** πŸ“Š
62
+ 2. **MySQL Databases** πŸ’Ύ
63
+ """)
64
+
65
+
66
+ # Buttons for Structured Data Examples
67
+ if st.button("Show Excel Files πŸ“‚"):
68
+ st.subheader("Excel Files")
69
+ st.write("""
70
+ Excel files store structured data in rows and columns. They allow for easy calculations, analysis, and data manipulation using formulas or pivot tables.
71
+ Excel is a widely used tool in business, finance, and data analytics.
72
+ """)
73
+ # In the Structured Data page, add the following for the Excel button:
74
+ st.subheader("Excel Files")
75
+ st.write("""
76
+ **Excel** is a spreadsheet application developed by Microsoft. It stores structured data in rows and columns,
77
+ making it ideal for data analysis, calculations, and visualization.
78
+ **Key Features of Excel:**
79
+ - Store, analyze, and visualize data in tabular format.
80
+ - Support for formulas, functions, and pivot tables for advanced data manipulation.
81
+ - Integration with other applications and databases.
82
+ - Support for multiple sheets in a single workbook.
83
+ **Common Extensions:**
84
+ - `.xlsx` (default format for modern Excel)
85
+ - `.xls` (older format for Excel)
86
+ - `.csv` (Comma-Separated Values, compatible with Excel)
87
+ **How to Handle Excel Files in Python:**
88
+ Python provides libraries like `pandas` and `openpyxl` for reading, writing, and processing Excel files.
89
+ """)
90
+
91
+ st.write("### Convert Excel to CSV πŸ“„")
92
+ st.code("""
93
+ import pandas as pd
94
+ # Convert a single Excel sheet to CSV
95
+ def excel_to_csv(excel_file, csv_file):
96
+ df = pd.read_excel(excel_file) # Read the Excel file
97
+ df.to_csv(csv_file, index=False) # Save as CSV
98
+ print(f"Excel file converted to {csv_file}")
99
+ # Example usage
100
+ excel_to_csv('input_file.xlsx', 'output_file.csv')
101
+ """, language="python")
102
+
103
+ st.write("### Convert Multiple Sheets to CSV πŸ“„")
104
+ st.code("""
105
+ import pandas as pd
106
+ # Convert all sheets in an Excel file to separate CSV files
107
+ def excel_sheets_to_csv(excel_file, output_dir):
108
+ # Read all sheets
109
+ sheets = pd.read_excel(excel_file, sheet_name=None)
110
+ for sheet_name, data in sheets.items():
111
+ csv_file = f"{output_dir}/{sheet_name}.csv" # Name CSV files by sheet name
112
+ data.to_csv(csv_file, index=False)
113
+ print(f"Sheet '{sheet_name}' converted to {csv_file}")
114
+ # Example usage
115
+ excel_sheets_to_csv('input_file.xlsx', 'output_directory')
116
+ """, language="python")
117
+
118
+ # Placeholder button for GitHub link
119
+ if st.button("GitHub Link πŸ”—"):
120
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
121
+
122
+ # Optional: Add an animation for Excel
123
+ excel_animation_url = "https://assets9.lottiefiles.com/packages/lf20_ktn4ouly.json" # Example Lottie URL for Excel
124
+ excel_animation = load_lottie_url(excel_animation_url)
125
+ if excel_animation:
126
+ st_lottie(excel_animation, height=300, key="excel_animation")
127
+
128
+ if st.button("Show MySQL Databases πŸ’»"):
129
+ st.subheader("MySQL Databases")
130
+ st.write("""
131
+ MySQL is a relational database management system that stores structured data in tables. SQL (Structured Query Language) is used to query and manipulate data in these databases.
132
+ It is commonly used in web applications and enterprise systems.
133
+ """)
134
+
135
+ st.write("""
136
+ **MySQL** is an open-source relational database management system (RDBMS) that stores structured data in tables.
137
+ It is widely used for managing and organizing data in web applications, enterprise systems, and data-driven projects.
138
+ **Key Features of MySQL:**
139
+ - High performance, scalability, and reliability.
140
+ - Support for SQL (Structured Query Language) for querying and managing data.
141
+ - Multi-user access and role-based permissions.
142
+ - Integration with multiple programming languages like Python, PHP, Java, etc.
143
+ **Common Use Cases:**
144
+ - Web application backends (e.g., WordPress, e-commerce platforms).
145
+ - Data analytics and reporting.
146
+ - Content management systems (CMS).
147
+
148
+ **MySQL Extensions:**
149
+ - `.sql`: Standard file extension for SQL database dumps.
150
+ - `.db`: Extension used by certain database systems but can also represent MySQL databases.
151
+ """)
152
+
153
+ st.write("""
154
+ ### Advantages of MySQL:
155
+ - Open-source and free to use.
156
+ - Cross-platform support (Windows, Linux, macOS).
157
+ - Regular updates and strong community support.
158
+ - Supports ACID compliance for data reliability.
159
+ ### Limitations of MySQL:
160
+ - Not as feature-rich as some enterprise-level database systems (e.g., Oracle, MS SQL Server).
161
+ - Limited support for advanced analytics and distributed databases.
162
+ """)
163
+
164
+ # Placeholder button for GitHub link
165
+ if st.button("GitHub Link πŸ”—"):
166
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
167
+
168
+ # Optional: Add an animation for MySQL
169
+ mysql_animation_url = "https://assets10.lottiefiles.com/packages/lf20_kq5msyia.json" # Example Lottie URL for MySQL
170
+ mysql_animation = load_lottie_url(mysql_animation_url)
171
+ if mysql_animation:
172
+ st_lottie(mysql_animation, height=300, key="mysql_animation")
173
+
174
+
175
+ # Semi-Structured Data Page
176
+ elif page == "Semi-Structured Data":
177
+ st.title("Semi-Structured Data 🧩")
178
+ animation = load_lottie_url(semi_structured_animation_url)
179
+ if animation:
180
+ st_lottie(animation, height=300, key="semi_structured_animation")
181
+
182
+ st.write("""
183
+ **Definition**: Semi-structured data does not have a strict table-based format but is partially organized using tags, markers, or key-value pairs.
184
+ While it is more flexible than structured data, it still has some organizational components.
185
+ """)
186
+
187
+ st.write("**Features**:")
188
+ st.markdown("""
189
+ - Flexible schema; not bound to a rigid structure.
190
+ - Easier to manage than unstructured data but more complex than structured data.
191
+ """)
192
+
193
+ st.write("**Examples of Semi-Structured Data**:")
194
+ st.markdown("""
195
+ 1. **JSON Files** πŸ“‘
196
+ 2. **XML Files** 🌐
197
+ """)
198
+
199
+ # Buttons for Semi-Structured Data Examples
200
+ if st.button("Show JSON Files πŸ“„"):
201
+ st.subheader("JSON Files")
202
+ st.write("""
203
+ **JSON (JavaScript Object Notation)** is a lightweight data-interchange format. It is easy for humans to read and write, and it is easy for machines to parse and generate. JSON is widely used to transmit data between a server and a web application.
204
+ **Key Features of JSON:**
205
+ - Stores data as key-value pairs.
206
+ - Supports nested structures, such as arrays and objects.
207
+ - Language-independent but derived from JavaScript.
208
+ **Common Use Cases:**
209
+ - API responses and requests in web development.
210
+ - Configuration files for applications.
211
+ - Data serialization and exchange in distributed systems.
212
+ **File Extension:**
213
+ - `.json`
214
+ **Advantages of JSON:**
215
+ - Lightweight and compact.
216
+ - Human-readable and easy to understand.
217
+ - Supported by most modern programming languages.
218
+ **Limitations of JSON:**
219
+ - Does not support comments.
220
+ - Less efficient for very large datasets compared to binary formats.
221
+ """)
222
+
223
+ st.write("### Python Example: Working with JSON πŸ“„")
224
+ st.write("#### Reading a JSON File and Accessing Its Data")
225
+ st.code("""
226
+ import json
227
+ # Reading a JSON file
228
+ with open('data.json', 'r') as file:
229
+ data = json.load(file)
230
+ # Accessing data
231
+ print("Name:", data['name'])
232
+ print("Age:", data['age'])
233
+ """, language="python")
234
+
235
+ st.write("#### Writing Data to a JSON File")
236
+ st.code("""
237
+ # Writing data to a JSON file
238
+ new_data = {
239
+ "name": "John Doe",
240
+ "age": 30,
241
+ "city": "New York"
242
+ }
243
+ with open('output.json', 'w') as file:
244
+ json.dump(new_data, file, indent=4)
245
+ print("Data saved to output.json")
246
+ """, language="python")
247
+
248
+ # Placeholder button for GitHub link
249
+ if st.button("GitHub Link πŸ”— (JSON)"):
250
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
251
+
252
+ # Optional: Add animation for JSON
253
+ json_animation_url = "https://assets9.lottiefiles.com/packages/lf20_9jdtwwzw.json" # Example Lottie URL for JSON
254
+ json_animation = load_lottie_url(json_animation_url)
255
+ if json_animation:
256
+ st_lottie(json_animation, height=300, key="json_animation")
257
+
258
+ # XML Button
259
+ if st.button("Show XML Files πŸ“„"):
260
+ st.subheader("XML Files")
261
+ st.write("""
262
+ **XML (eXtensible Markup Language)** is a markup language designed to store and transport data. XML emphasizes simplicity, generality, and usability across the Internet.
263
+ **Key Features of XML:**
264
+ - Data is stored in a tree-like structure with nested elements.
265
+ - Customizable tags allow flexibility in representing data.
266
+ - Both human-readable and machine-readable.
267
+ **Common Use Cases:**
268
+ - Data interchange between systems.
269
+ - Configuration files for applications and servers.
270
+ - RSS feeds and web services (e.g., SOAP).
271
+ **File Extension:**
272
+ - `.xml`
273
+ **Advantages of XML:**
274
+ - Highly flexible and customizable.
275
+ - Self-descriptive and easy to understand.
276
+ - Widely supported in web and enterprise applications.
277
+ **Limitations of XML:**
278
+ - More verbose compared to JSON.
279
+ - Slower to parse and larger in size.
280
+ """)
281
+
282
+ st.write("### Python Example: Working with XML πŸ“„")
283
+ st.write("#### Reading an XML File and Parsing Its Data")
284
+ st.code("""
285
+ import xml.etree.ElementTree as ET
286
+ # Parsing an XML file
287
+ tree = ET.parse('data.xml')
288
+ root = tree.getroot()
289
+ # Accessing data
290
+ for child in root:
291
+ print(child.tag, ":", child.text)
292
+ """, language="python")
293
+
294
+ st.write("#### Writing Data to an XML File")
295
+ st.code("""
296
+ import xml.etree.ElementTree as ET
297
+ # Creating an XML structure
298
+ root = ET.Element("person")
299
+ name = ET.SubElement(root, "name")
300
+ name.text = "John Doe"
301
+ age = ET.SubElement(root, "age")
302
+ age.text = "30"
303
+ # Writing to a file
304
+ tree = ET.ElementTree(root)
305
+ tree.write("output.xml")
306
+ print("Data saved to output.xml")
307
+ """, language="python")
308
+
309
+ # Placeholder button for GitHub link
310
+ if st.button("GitHub Link πŸ”— (XML)"):
311
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
312
+
313
+ # Optional: Add animation for XML
314
+ xml_animation_url = "https://assets7.lottiefiles.com/packages/lf20_7ozhpxio.json" # Example Lottie URL for XML
315
+ xml_animation = load_lottie_url(xml_animation_url)
316
+ if xml_animation:
317
+ st_lottie(xml_animation, height=300, key="xml_animation")
318
+
319
+
320
+ # Unstructured Data Page
321
+ elif page == "Unstructured Data":
322
+ st.title("Unstructured Data πŸ—‚οΈ")
323
+ animation = load_lottie_url(unstructured_animation_url)
324
+ if animation:
325
+ st_lottie(animation, height=300, key="unstructured_animation")
326
+
327
+ st.write("""
328
+ **Definition**: Unstructured data lacks any predefined structure or schema, making it the most difficult to organize and analyze.
329
+ It is typically raw and needs advanced processing to extract insights.
330
+ """)
331
+
332
+ st.write("**Features**:")
333
+ st.markdown("""
334
+ - Free-form; not stored in a tabular format.
335
+ - Requires specialized tools like AI or machine learning to analyze.
336
+ """)
337
+
338
+ st.write("**Examples of Unstructured Data**:")
339
+ st.markdown("""
340
+ 1. **Images πŸ–ΌοΈ**
341
+ 2. **Videos πŸŽ₯**
342
+ 3. **Audio πŸ”Š**
343
+ 4. **Text πŸ–Ή**
344
+ """)
345
+
346
+ # Buttons for Unstructured Data Examples
347
+ if st.button("Show Image πŸ“·"):
348
+ st.subheader("Working with Images")
349
+ st.write("""
350
+ **Images** are one of the most common forms of unstructured data. They are represented as a grid of pixels, each having color information (RGB or grayscale). Images are used in various domains such as computer vision, medical imaging, and entertainment.
351
+ **Common File Formats:**
352
+ - `.jpg` or `.jpeg` (Joint Photographic Experts Group)
353
+ - `.png` (Portable Network Graphics)
354
+ - `.bmp` (Bitmap Image File)
355
+ - `.tiff` (Tagged Image File Format)
356
+ """)
357
+
358
+ st.write("### Steps to Convert an Image into an Array πŸ“Š")
359
+ st.write("""
360
+ Converting an image into a numerical array is a key step in image processing. Here's how it's typically done:
361
+ 1. Load the image using an image processing library (e.g., OpenCV or PIL).
362
+ 2. Convert the image into a NumPy array.
363
+ 3. Access pixel data for analysis or manipulation.
364
+ """)
365
+
366
+ st.write("#### Example Code: Converting an Image into an Array")
367
+ st.code("""
368
+ import cv2
369
+ import numpy as np
370
+ # Load the image
371
+ image_path = 'image.jpg' # Path to the image
372
+ image = cv2.imread(image_path) # Load image as BGR format
373
+ # Convert to NumPy array
374
+ image_array = np.array(image)
375
+ # Display shape and pixel data
376
+ print("Image Shape:", image_array.shape) # (Height, Width, Channels)
377
+ print("Pixel Data (Top-left):", image_array[0, 0]) # Pixel value at (0, 0)
378
+ """, language="python")
379
+
380
+ # Placeholder button for GitHub link
381
+ if st.button("GitHub Link πŸ”— (Image)"):
382
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
383
+
384
+ if st.button("Show Video πŸŽ₯"):
385
+ st.subheader("Working with Videos")
386
+ st.write("""
387
+ **Videos** are sequences of images (frames) that are displayed at a specific frame rate to create a moving picture. Videos are used in surveillance, entertainment, and machine learning applications like activity recognition and object detection.
388
+ **Common File Formats:**
389
+ - `.mp4` (MPEG-4 Part 14)
390
+ - `.avi` (Audio Video Interleave)
391
+ - `.mov` (QuickTime File Format)
392
+ - `.mkv` (Matroska Video File Format)
393
+ """)
394
+
395
+ st.write("### Steps to Convert a Video into Frames πŸ“Έ")
396
+ st.write("""
397
+ Breaking a video into individual frames is an important step in video analysis. Here's how it's done:
398
+ 1. Load the video using a video processing library like OpenCV.
399
+ 2. Loop through each frame and save or process it.
400
+ 3. Save the frames as images for further processing.
401
+ """)
402
+
403
+ st.write("#### Example Code: Converting a Video into Frames")
404
+ st.code("""
405
+ import cv2
406
+ import os
407
+ # Load the video
408
+ video_path = 'video.mp4' # Path to the video
409
+ video = cv2.VideoCapture(video_path)
410
+ # Create a folder to store the frames
411
+ output_folder = 'frames'
412
+ os.makedirs(output_folder, exist_ok=True)
413
+ frame_number = 0
414
+ while True:
415
+ ret, frame = video.read() # Read the next frame
416
+ if not ret:
417
+ break # Exit if no frames are left
418
+ # Save the frame as an image
419
+ frame_path = os.path.join(output_folder, f'frame_{frame_number:04d}.jpg')
420
+ cv2.imwrite(frame_path, frame)
421
+ frame_number += 1
422
+ print(f"Extracted {frame_number} frames and saved to {output_folder}")
423
+ video.release()
424
+ """, language="python")
425
+
426
+ # Placeholder button for GitHub link
427
+ if st.button("GitHub Link πŸ”— (Video)"):
428
+ st.write("**GitHub Repository:** [Provide your GitHub link here]")
429
+
430
+
431
+ if st.button("Show Audio πŸ”Š"):
432
+ st.subheader("Audio")
433
+ st.write("""
434
+ Social media posts, such as tweets, Facebook updates, or Instagram images, represent unstructured data. They contain a mix of text, images, and metadata and require NLP (Natural Language Processing) for analysis.
435
+ """)
436
+
437
+ if st.button("Show Text πŸ“"):
438
+ st.subheader("Text")
439
+ st.write("""
440
+ Social media posts, such as tweets, Facebook updates, or Instagram images, represent unstructured data. They contain a mix of text, images, and metadata and require NLP (Natural Language Processing) for analysis.
441
+ """)
442
+
443
+ # Footer
444
+ st.write("This app provides a clear understanding of data and its various types, especially based on structure. πŸŽ‰")