Spaces:
Sleeping
Sleeping
Update pages/Data Collection.py
Browse files- pages/Data Collection.py +309 -5
pages/Data Collection.py
CHANGED
|
@@ -1979,16 +1979,320 @@ elif st.session_state.current_page == "explore_csv":
|
|
| 1979 |
#--------------------------------------------------------- Json --------------------------------------------------------------------------------
|
| 1980 |
|
| 1981 |
|
| 1982 |
-
|
| 1983 |
elif st.session_state.current_page == "explore_json":
|
| 1984 |
st.markdown("""
|
| 1985 |
-
<
|
| 1986 |
""", unsafe_allow_html=True)
|
|
|
|
| 1987 |
st.write("""
|
| 1988 |
-
JSON
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1989 |
""")
|
| 1990 |
-
|
| 1991 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1992 |
|
| 1993 |
|
| 1994 |
#--------------------------------------------------------- XML -------------------------------------------------------------------------------
|
|
|
|
| 1979 |
#--------------------------------------------------------- Json --------------------------------------------------------------------------------
|
| 1980 |
|
| 1981 |
|
|
|
|
| 1982 |
elif st.session_state.current_page == "explore_json":
|
| 1983 |
st.markdown("""
|
| 1984 |
+
<h2 style="color: #BB3385;">JavaScript Object Notation (JSON)</h2>
|
| 1985 |
""", unsafe_allow_html=True)
|
| 1986 |
+
|
| 1987 |
st.write("""
|
| 1988 |
+
- **JSON (JavaScript Object Notation)** is a lightweight data-interchange format.
|
| 1989 |
+
- It is easy for humans to read and write, and easy for machines to parse and generate.
|
| 1990 |
+
- JSON is used to represent data as key-value pairs and supports hierarchical structures.
|
| 1991 |
+
- Commonly used for:
|
| 1992 |
+
- Web APIs for sending and receiving data.
|
| 1993 |
+
- Configuration files.
|
| 1994 |
+
- Storing structured and semi-structured data.
|
| 1995 |
""")
|
| 1996 |
+
|
| 1997 |
+
st.markdown("""
|
| 1998 |
+
<h3 style="color: #5b2c6f;">Default JSON Format</h3>
|
| 1999 |
+
""", unsafe_allow_html=True)
|
| 2000 |
+
|
| 2001 |
+
st.write("""
|
| 2002 |
+
- JSON format is similar to a Python dictionary with key-value pairs.
|
| 2003 |
+
- The main difference between JSON and a Python dictionary is:
|
| 2004 |
+
- **In JSON**:
|
| 2005 |
+
- Keys must be in string format.
|
| 2006 |
+
- Values can be of various types (e.g., strings, numbers, arrays, objects).
|
| 2007 |
+
- **In Python Dictionary**:
|
| 2008 |
+
- Keys can be any hashable type (e.g., strings, numbers, tuples).
|
| 2009 |
+
""")
|
| 2010 |
+
|
| 2011 |
+
st.markdown("""
|
| 2012 |
+
<h4 style="color: #2a52be;">Example</h4>
|
| 2013 |
+
""", unsafe_allow_html=True)
|
| 2014 |
+
|
| 2015 |
+
st.code("""
|
| 2016 |
+
# JSON Format
|
| 2017 |
+
{
|
| 2018 |
+
"name": ["a", "b", "c"],
|
| 2019 |
+
"age": [11, 12, 13]
|
| 2020 |
+
}
|
| 2021 |
+
""", language="json")
|
| 2022 |
+
|
| 2023 |
+
st.code("""
|
| 2024 |
+
# Python Dictionary
|
| 2025 |
+
{
|
| 2026 |
+
"name": ["a", "b", "c"],
|
| 2027 |
+
"age": [11, 12, 13]
|
| 2028 |
+
}
|
| 2029 |
+
""", language="python")
|
| 2030 |
+
|
| 2031 |
+
st.markdown("""
|
| 2032 |
+
<h3 style="color: #5b2c6f;">JSON in Structured Data</h3>
|
| 2033 |
+
""", unsafe_allow_html=True)
|
| 2034 |
+
|
| 2035 |
+
st.write("""
|
| 2036 |
+
- JSON is considered structured when it has a consistent format with uniform key-value pairs for all entries.
|
| 2037 |
+
- This allows direct conversion into a tabular format, such as a DataFrame.
|
| 2038 |
+
""")
|
| 2039 |
+
|
| 2040 |
+
st.code("""
|
| 2041 |
+
# Example of Structured JSON
|
| 2042 |
+
|
| 2043 |
+
[
|
| 2044 |
+
{ "Id": 100, "Name": "Lakshmi Harika", "Age": 22, "Gender": "Female" },
|
| 2045 |
+
{ "Id": 101, "Name": "Varshitha", "Age": 23, "Gender": "Female" },
|
| 2046 |
+
{ "Id": 102, "Name": "Hari Chandan", "Age": 22, "Gender": "Male" },
|
| 2047 |
+
{ "Id": 103, "Name": "Shamitha", "Age": 23, "Gender": "Female" }
|
| 2048 |
+
]
|
| 2049 |
+
""", language="json")
|
| 2050 |
+
|
| 2051 |
+
st.code("""
|
| 2052 |
+
|
| 2053 |
+
# Reading a structured JSON file
|
| 2054 |
+
df = pd.read_json('structured_data.json')
|
| 2055 |
+
print(df)
|
| 2056 |
+
""", language="python")
|
| 2057 |
+
|
| 2058 |
+
st.markdown("""
|
| 2059 |
+
<h3 style="color: #5b2c6f;">JSON Orientations in Structured Data</h2>
|
| 2060 |
+
""", unsafe_allow_html=True)
|
| 2061 |
+
|
| 2062 |
+
st.write("""
|
| 2063 |
+
- JSON can represent data in various orientations using the `orient` parameter in `pandas.to_json()` or `pandas.read_json()`.""")
|
| 2064 |
+
|
| 2065 |
+
st.markdown("""
|
| 2066 |
+
<h4 style="color: #2a52be;">JSON with Orient = 'index'</h4>
|
| 2067 |
+
""", unsafe_allow_html=True)
|
| 2068 |
+
|
| 2069 |
+
st.write("""
|
| 2070 |
+
- When **`orient='index'`**:
|
| 2071 |
+
- In this format, keys represent row indices, and the values are dictionaries of column names and their respective data.
|
| 2072 |
+
- It is useful when the data is naturally indexed.
|
| 2073 |
+
""")
|
| 2074 |
+
|
| 2075 |
+
st.code("""
|
| 2076 |
+
|
| 2077 |
+
# Example of JSON with orient='index'
|
| 2078 |
+
{
|
| 2079 |
+
"0": { "Id": 100, "Name": "Lakshmi Harika", "Age": 22, "Gender": "Female" },
|
| 2080 |
+
"1": { "Id": 101, "Name": "Varshitha", "Age": 23, "Gender": "Female" },
|
| 2081 |
+
"2": { "Id": 102, "Name": "Hari Chandan", "Age": 22, "Gender": "Male" },
|
| 2082 |
+
"3": { "Id": 103, "Name": "Shamitha", "Age": 23, "Gender": "Female" }
|
| 2083 |
+
}
|
| 2084 |
+
""", language="json")
|
| 2085 |
+
|
| 2086 |
+
st.code("""
|
| 2087 |
+
|
| 2088 |
+
# Creating a DataFrame
|
| 2089 |
+
data = pd.DataFrame({
|
| 2090 |
+
"Id": [100, 101, 102, 103],
|
| 2091 |
+
"Name": ["Lakshmi Harika", "Varshitha", "Hari Chandan", "Shamitha"],
|
| 2092 |
+
"Age": [22, 23, 22, 23],
|
| 2093 |
+
"Gender": ["Female", "Female", "Male", "Female"]
|
| 2094 |
+
})
|
| 2095 |
+
|
| 2096 |
+
# Exporting to JSON with orient='index'
|
| 2097 |
+
json_data = data.to_json(orient='index')
|
| 2098 |
+
print(json_data)
|
| 2099 |
+
|
| 2100 |
+
# Reading back from JSON with orient='index'
|
| 2101 |
+
df = pd.read_json(json_data, orient='index')
|
| 2102 |
+
print(df)
|
| 2103 |
+
""", language="python")
|
| 2104 |
+
|
| 2105 |
+
|
| 2106 |
+
st.markdown("""
|
| 2107 |
+
<h4 style="color: #2a52be;">JSON with Orient = 'columns'</h4>
|
| 2108 |
+
""", unsafe_allow_html=True)
|
| 2109 |
+
|
| 2110 |
+
st.write("""
|
| 2111 |
+
- When **`orient='columns'`**:
|
| 2112 |
+
- Keys represent column names, and the values are dictionaries where each key is the row index, and the value is the data.
|
| 2113 |
+
- This is the default orientation when exporting DataFrames to JSON.
|
| 2114 |
+
""")
|
| 2115 |
+
|
| 2116 |
+
st.code("""
|
| 2117 |
+
|
| 2118 |
+
# Example of JSON with orient='columns'
|
| 2119 |
+
{
|
| 2120 |
+
"Id": { "0": 100, "1": 101, "2": 102, "3": 103 },
|
| 2121 |
+
"Name": { "0": "Lakshmi Harika", "1": "Varshitha", "2": "Hari Chandan", "3": "Shamitha" },
|
| 2122 |
+
"Age": { "0": 22, "1": 23, "2": 22, "3": 23 },
|
| 2123 |
+
"Gender": { "0": "Female", "1": "Female", "2": "Male", "3": "Female" }
|
| 2124 |
+
}
|
| 2125 |
+
""", language="json")
|
| 2126 |
+
|
| 2127 |
+
st.code("""
|
| 2128 |
+
|
| 2129 |
+
# Creating a DataFrame
|
| 2130 |
+
data = pd.DataFrame({
|
| 2131 |
+
"Id": [100, 101, 102, 103],
|
| 2132 |
+
"Name": ["Lakshmi Harika", "Varshitha", "Hari Chandan", "Shamitha"],
|
| 2133 |
+
"Age": [22, 23, 22, 23],
|
| 2134 |
+
"Gender": ["Female", "Female", "Male", "Female"]
|
| 2135 |
+
})
|
| 2136 |
+
|
| 2137 |
+
# Exporting to JSON with orient='columns'
|
| 2138 |
+
json_data = data.to_json(orient='columns')
|
| 2139 |
+
print(json_data)
|
| 2140 |
+
|
| 2141 |
+
# Reading back from JSON with orient='columns'
|
| 2142 |
+
df = pd.read_json(json_data, orient='columns')
|
| 2143 |
+
print(df)
|
| 2144 |
+
""", language="python")
|
| 2145 |
+
|
| 2146 |
+
st.markdown("""
|
| 2147 |
+
<h4 style="color: #2a52be;">JSON with Orient = 'values'</h4>
|
| 2148 |
+
""", unsafe_allow_html=True)
|
| 2149 |
+
|
| 2150 |
+
st.write("""
|
| 2151 |
+
- When **`orient='values'`**:
|
| 2152 |
+
- The JSON represents the data as an array of arrays.
|
| 2153 |
+
- Each inner array corresponds to a row of data, and the order matches the DataFrame’s column order.
|
| 2154 |
+
""")
|
| 2155 |
+
|
| 2156 |
+
st.code("""
|
| 2157 |
+
# Example of JSON with orient='values'
|
| 2158 |
+
[
|
| 2159 |
+
[100, "Lakshmi Harika", 22, "Female"],
|
| 2160 |
+
[101, "Varshitha", 23, "Female"],
|
| 2161 |
+
[102, "Hari Chandan", 22, "Male"],
|
| 2162 |
+
[103, "Shamitha", 23, "Female"]
|
| 2163 |
+
]
|
| 2164 |
+
""", language="json")
|
| 2165 |
+
|
| 2166 |
+
st.code("""
|
| 2167 |
+
|
| 2168 |
+
# Creating a DataFrame
|
| 2169 |
+
data = pd.DataFrame({
|
| 2170 |
+
"Id": [100, 101, 102, 103],
|
| 2171 |
+
"Name": ["Lakshmi Harika", "Varshitha", "Hari Chandan", "Shamitha"],
|
| 2172 |
+
"Age": [22, 23, 22, 23],
|
| 2173 |
+
"Gender": ["Female", "Female", "Male", "Female"]
|
| 2174 |
+
})
|
| 2175 |
+
|
| 2176 |
+
# Exporting to JSON with orient='values'
|
| 2177 |
+
json_data = data.to_json(orient='values')
|
| 2178 |
+
print(json_data)
|
| 2179 |
+
|
| 2180 |
+
# Reading back from JSON with orient='values'
|
| 2181 |
+
df = pd.read_json(json_data, orient='values')
|
| 2182 |
+
print(df)
|
| 2183 |
+
""", language="python")
|
| 2184 |
+
|
| 2185 |
+
st.markdown("""
|
| 2186 |
+
<h4 style="color: #2a52be;">JSON with Orient = 'split'</h4>
|
| 2187 |
+
""", unsafe_allow_html=True)
|
| 2188 |
+
|
| 2189 |
+
st.write("""
|
| 2190 |
+
- When **`orient='split'`**:
|
| 2191 |
+
- The JSON structure splits the data into three parts:
|
| 2192 |
+
1. `index`: Contains the row indices.
|
| 2193 |
+
2. `columns`: Contains the column names.
|
| 2194 |
+
3. `data`: Contains the actual data as a 2D array.
|
| 2195 |
+
- This orientation is useful for reconstructing the original DataFrame structure.
|
| 2196 |
+
""")
|
| 2197 |
+
|
| 2198 |
+
st.code("""
|
| 2199 |
+
# Example of JSON with orient='split'
|
| 2200 |
+
{
|
| 2201 |
+
"index": [0, 1, 2, 3],
|
| 2202 |
+
"columns": ["Id", "Name", "Age", "Gender"],
|
| 2203 |
+
"data": [
|
| 2204 |
+
[100, "Lakshmi Harika", 22, "Female"],
|
| 2205 |
+
[101, "Varshitha", 23, "Female"],
|
| 2206 |
+
[102, "Hari Chandan", 22, "Male"],
|
| 2207 |
+
[103, "Shamitha", 23, "Female"]
|
| 2208 |
+
]
|
| 2209 |
+
}
|
| 2210 |
+
""", language="json")
|
| 2211 |
+
|
| 2212 |
+
st.code("""
|
| 2213 |
+
|
| 2214 |
+
# Creating a DataFrame
|
| 2215 |
+
data = pd.DataFrame({
|
| 2216 |
+
"Id": [100, 101, 102, 103],
|
| 2217 |
+
"Name": ["Lakshmi Harika", "Varshitha", "Hari Chandan", "Shamitha"],
|
| 2218 |
+
"Age": [22, 23, 22, 23],
|
| 2219 |
+
"Gender": ["Female", "Female", "Male", "Female"]
|
| 2220 |
+
})
|
| 2221 |
+
|
| 2222 |
+
# Exporting to JSON with orient='split'
|
| 2223 |
+
json_data = data.to_json(orient='split')
|
| 2224 |
+
print(json_data)
|
| 2225 |
+
|
| 2226 |
+
# Reading back from JSON with orient='split'
|
| 2227 |
+
df = pd.read_json(json_data, orient='split')
|
| 2228 |
+
print(df)
|
| 2229 |
+
""", language="python")
|
| 2230 |
+
|
| 2231 |
+
st.markdown("""
|
| 2232 |
+
<h3 style="color: #5b2c6f;">JSON in Semi-Structured Data</h3>
|
| 2233 |
+
""", unsafe_allow_html=True)
|
| 2234 |
+
|
| 2235 |
+
st.write("""
|
| 2236 |
+
- If the JSON file is in semi-structured format, we can use `pd.json_normalize()` to convert it into a DataFrame.
|
| 2237 |
+
- **Semi-Structured JSON**:
|
| 2238 |
+
- A JSON structure is considered semi-structured when one or multiple columns contain data in the form of lists of dictionaries.
|
| 2239 |
+
- This format requires flattening or normalization to be converted into a tabular structure.
|
| 2240 |
+
- When using `pd.json_normalize()`, ensure the data is in a **dictionary format**; otherwise, it will throw an error.
|
| 2241 |
+
""")
|
| 2242 |
+
|
| 2243 |
+
st.code("""
|
| 2244 |
+
# Example Nested JSON
|
| 2245 |
+
x = [
|
| 2246 |
+
{"name": "Lakshmi Harika", "age": 23, "gender": "f", "marks": [{"maths": 75, "English": 82}]},
|
| 2247 |
+
{"name": "Varshitha", "age": 43, "gender": "f", "marks": [{"maths": 65, "English": 72}]},
|
| 2248 |
+
{"name": "Hari Chandan", "age": 28, "gender": "m", "marks": [{"maths": 85, "English": 92}]},
|
| 2249 |
+
{"name": "Shamitha", "age": 21, "gender": "f", "marks": [{"maths": 90, "English": 88}]}
|
| 2250 |
+
]
|
| 2251 |
+
""", language="python")
|
| 2252 |
+
|
| 2253 |
+
st.markdown("""
|
| 2254 |
+
<h4 style="color: #5b2c6f;">Parameters to Understand</h4>
|
| 2255 |
+
""", unsafe_allow_html=True)
|
| 2256 |
+
|
| 2257 |
+
st.write("""
|
| 2258 |
+
1. **record_path**:
|
| 2259 |
+
- Specifies the path to nested lists or dictionaries that need to be flattened.
|
| 2260 |
+
- Example: For the key `marks`, the path would be `'marks'`.
|
| 2261 |
+
|
| 2262 |
+
2. **meta**:
|
| 2263 |
+
- Specifies fields to include as metadata in the resulting DataFrame.
|
| 2264 |
+
- These fields remain unchanged and are added to the resulting DataFrame.
|
| 2265 |
+
|
| 2266 |
+
3. **max_level**:
|
| 2267 |
+
- Controls the depth of the flattening.
|
| 2268 |
+
- Default is `None` (flattens everything), but setting it to a specific number limits the depth.
|
| 2269 |
+
""")
|
| 2270 |
+
|
| 2271 |
+
st.code("""
|
| 2272 |
+
# Example of Semi-Structured JSON
|
| 2273 |
+
|
| 2274 |
+
df = pd.json_normalize(
|
| 2275 |
+
x,
|
| 2276 |
+
record_path=['marks'], # Specifies the nested list to flatten
|
| 2277 |
+
meta=['name', 'age', 'gender'], # Fields to include as metadata
|
| 2278 |
+
max_level=1 # Specifies the depth of flattening
|
| 2279 |
+
)
|
| 2280 |
+
print(df)
|
| 2281 |
+
""", language="python")
|
| 2282 |
+
|
| 2283 |
+
st.write("""
|
| 2284 |
+
The resulting DataFrame will look like this:
|
| 2285 |
+
""")
|
| 2286 |
+
|
| 2287 |
+
st.table({
|
| 2288 |
+
"maths": [75, 65, 85, 90],
|
| 2289 |
+
"English": [82, 72, 92, 88],
|
| 2290 |
+
"name": ["Lakshmi Harika", "Varshitha", "Hari Chandan", "Shamitha"],
|
| 2291 |
+
"age": [23, 43, 28, 21],
|
| 2292 |
+
"gender": ["f", "f", "m", "f"]
|
| 2293 |
+
})
|
| 2294 |
+
|
| 2295 |
+
|
| 2296 |
|
| 2297 |
|
| 2298 |
#--------------------------------------------------------- XML -------------------------------------------------------------------------------
|