Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,248 +45,6 @@ st.subheader("**2.1 Descriptive Statistics**")
|
|
| 45 |
st.markdown("""This Descriptive Statistics describe the main feature of data. This
|
| 46 |
descriptive statistics can be performed on sample data as well as population data. Some of
|
| 47 |
the key points of descriptive statistics are stated below.\n KEY COCEPTS\n * Measurement of Central Tendency which involves finding Mean, Median, and Mode.\n * Measurement of Dispersion which involves finding Range, Variance and Standard Deviation.\n * Distribution which gives how frequently the data is occurring some of examples of distribution are Gaussian, Random, and Normal distribution""")
|
| 48 |
-
st.subheader("Measure Of Central Tendency",divider=True)
|
| 49 |
-
st.markdown("""The measure of central tendency is used to find the central average value of the data.The central tendency can be computed by
|
| 50 |
-
useing three ways \n * Mode \n * Median \n * Mean""")
|
| 51 |
-
st.subheader("MODE",divider=True)
|
| 52 |
-
st.markdown("""Mode will be giving the centeral tendency based on most frequently occuring data.The major drawback of mode is its frequecy baised it
|
| 53 |
-
mostly focus on the data which is occuring most times.Here in this mode we might come across some situation's like """)
|
| 54 |
-
st.markdown(''':violet[No_Mode] \n Let's understand why this situation raises for example let's take list of numbers [1,2,3,4,5] here we don't have
|
| 55 |
-
frequency of numbers repeating in this senario we will come accross No_Mode situaton.
|
| 56 |
-
''')
|
| 57 |
-
st.markdown(''':violet[Uni_Mode] \n Let's understand why this situation raises for example let's take list of numbers [1,1,1,2,3,4,5]. here by
|
| 58 |
-
checking the list it will tend to know that the frequency of number 1 is more and it returns the value 1 as output.
|
| 59 |
-
''')
|
| 60 |
-
st.markdown(''':violet[Bi_Mode] \n Let's understand why this situation raises for example let's take list of numbers [1,1,2,2,3,4,5]. here by
|
| 61 |
-
checking the frequency in list we come across a situtaion where we will find two maximun frequecy repeated value hence the output will be Bi_Mode.
|
| 62 |
-
''')
|
| 63 |
-
st.markdown(''':violet[Tri_Mode] \n Let's understand why this situation raises for example let's take list of numbers [1,1,2,2,3,3,4,5]. here by
|
| 64 |
-
checking the frequency in list we come across a situtaion where we will find three maximun frequecy repeated value hence the output will be Tri_Mode.
|
| 65 |
-
''')
|
| 66 |
-
st.markdown(''':violet[Multi_Mode] \n Let's understand why this situation raises for example let's take list of numbers [1,1,2,2,3,3,4,4,5]. here by
|
| 67 |
-
checking the frequency in list we come across a situtaion where we will find more than three maximun frequecy repeated value hence the output will be Multi_Mode.
|
| 68 |
-
''')
|
| 69 |
-
st.title("Calculate Mode")
|
| 70 |
-
def mode(*args):
|
| 71 |
-
list1 = list(args)
|
| 72 |
-
dict1 = {}
|
| 73 |
-
dict2 = {}
|
| 74 |
-
set1 = set(list1)
|
| 75 |
-
for j in set1:
|
| 76 |
-
dict1[j] = list1.count(j)
|
| 77 |
-
max_value = max(dict1.values())
|
| 78 |
-
count = [key for key, value in dict1.items() if value == max_value]
|
| 79 |
-
if max_value == 1:
|
| 80 |
-
return 'no mode'
|
| 81 |
-
elif len(count) == len(set1):
|
| 82 |
-
return 'no mode'
|
| 83 |
-
elif len(count) == 1:
|
| 84 |
-
dict2[count[0]] = dict1.get(count[0])
|
| 85 |
-
return dict2
|
| 86 |
-
elif len(count) == 2:
|
| 87 |
-
return 'bi mode'
|
| 88 |
-
elif len(count) == 3:
|
| 89 |
-
return 'tri mode'
|
| 90 |
-
else:
|
| 91 |
-
return 'multimode'
|
| 92 |
-
numbers_input = st.text_input("Enter a list of numbers separated by commas (e.g., 1, 2, 2, 3, 4):")
|
| 93 |
-
|
| 94 |
-
if numbers_input:
|
| 95 |
-
try:
|
| 96 |
-
list1 = list(map(int, numbers_input.split(',')))
|
| 97 |
-
result = mode(*list1)
|
| 98 |
-
st.write("Mode result:", result)
|
| 99 |
-
except ValueError:
|
| 100 |
-
st.write("Please enter a valid list of numbers separated by commas.")
|
| 101 |
-
st.subheader("Median",divider=True)
|
| 102 |
-
st.markdown("""Median will also be giving the central tendency.But the major drawback of median is it prior foucus will be on the central value.
|
| 103 |
-
In order to find the mean first we have to sort the give list and based on the length of the list the formula are changed""")
|
| 104 |
-
st.subheader("Median Formula for Odd Number of Observations")
|
| 105 |
-
st.latex(r'''
|
| 106 |
-
\text{Median} = X_{\left(\frac{n+1}{2}\right)}
|
| 107 |
-
''')
|
| 108 |
-
st.subheader("Median Formula for Even Number of Observations")
|
| 109 |
-
st.latex(r'''
|
| 110 |
-
\text{Median} = \frac{X_{\left(\frac{n}{2}\right)} + X_{\left(\frac{n}{2}+1\right)}}{2}
|
| 111 |
-
''')
|
| 112 |
-
def median(list1):
|
| 113 |
-
list1.sort()
|
| 114 |
-
length = len(list1)
|
| 115 |
-
if length % 2 == 0:
|
| 116 |
-
mid1 = length // 2 - 1
|
| 117 |
-
mid2 = length // 2
|
| 118 |
-
return (list1[mid1] + list1[mid2]) / 2
|
| 119 |
-
else:
|
| 120 |
-
mid = length // 2
|
| 121 |
-
return list1[mid]
|
| 122 |
-
st.title("Calculate Median")
|
| 123 |
-
numbers_input_1 = st.text_input("Enter a list of numbers separated by commas (e.g., 1, 2, 3, 4, 5):", key="numbers_input_1")
|
| 124 |
-
if numbers_input_1:
|
| 125 |
-
parts = numbers_input_1.split(',')
|
| 126 |
-
list1 = []
|
| 127 |
-
|
| 128 |
-
for num in parts:
|
| 129 |
-
num = num.strip()
|
| 130 |
-
if num.isdigit():
|
| 131 |
-
list1.append(int(num))
|
| 132 |
-
|
| 133 |
-
if list1:
|
| 134 |
-
result = median(list1)
|
| 135 |
-
st.write("Median result:", result)
|
| 136 |
-
else:
|
| 137 |
-
st.write("No valid numbers provided.")
|
| 138 |
-
st.subheader("Mean",divider=True)
|
| 139 |
-
st.markdown("""
|
| 140 |
-
Mean is one of the beautiful measurement of central tendency it invovles all the data present in it.The only drawback of mean is it is
|
| 141 |
-
effected by outliers.Based on the data we will compute the mean in three types""")
|
| 142 |
-
st.subheader("Arthmetic Mean",divider=True)
|
| 143 |
-
st.markdown("""Arthmetic Mean is used on data which have \n * Interval and Ratio Data \n * Symmetric Distributions \n * Data Without Outliers
|
| 144 |
-
""")
|
| 145 |
-
st.subheader("Population Mean Formula")
|
| 146 |
-
st.latex(r'''
|
| 147 |
-
\mu = \frac{1}{N} \sum_{i=1}^{N} x_i
|
| 148 |
-
''')
|
| 149 |
-
st.subheader("Sample Mean Formula")
|
| 150 |
-
st.latex(r'''
|
| 151 |
-
\bar{x} = \frac{1}{n} \sum_{i=1}^{n} x_i
|
| 152 |
-
''')
|
| 153 |
-
def arthamatic_mean(list1):
|
| 154 |
-
sum=reduce(lambda x,y: x+y,list1)
|
| 155 |
-
return sum/len(list1)
|
| 156 |
-
st.title("Calculate Arthmetic_Mean")
|
| 157 |
-
numbers_input_2 = st.text_input("Enter a list of numbers separated by commas (e.g., 1, 2, 3, 4, 5):", key="numbers_input_2")
|
| 158 |
-
if numbers_input_2:
|
| 159 |
-
parts=numbers_input_2.split(",")
|
| 160 |
-
list1=[]
|
| 161 |
-
for i in parts:
|
| 162 |
-
i = i.strip()
|
| 163 |
-
if i.isdigit():
|
| 164 |
-
list1.append(int(i))
|
| 165 |
-
if list1:
|
| 166 |
-
result=arthamatic_mean(list1)
|
| 167 |
-
st.write("Arthmetic_Mean",result)
|
| 168 |
-
else:
|
| 169 |
-
st.write("No valid numbers provided.")
|
| 170 |
-
st.subheader("Geometric Mean",divider=True)
|
| 171 |
-
st.markdown("""Geometric Mean is used on data which have \n * Multiplicative Data \n * Percentages and Rates \n * Normalized Data
|
| 172 |
-
""")
|
| 173 |
-
st.subheader("Geometric Mean for Population")
|
| 174 |
-
st.latex(r'''
|
| 175 |
-
\text{GM}_{\text{population}} = \left( \prod_{i=1}^{N} x_i \right)^{\frac{1}{N}}
|
| 176 |
-
''')
|
| 177 |
-
st.subheader("Geometric Mean for Sample")
|
| 178 |
-
st.latex(r'''
|
| 179 |
-
\text{GM}_{\text{sample}} = \left( \prod_{i=1}^{n} x_i \right)^{\frac{1}{n}}
|
| 180 |
-
''')
|
| 181 |
-
def geometric_mean(list1):
|
| 182 |
-
mul=reduce(lambda x,y: x*y,list1)
|
| 183 |
-
return round(mul**(1/len(list1)),2)
|
| 184 |
-
st.title("Calculate Geometric_Mean")
|
| 185 |
-
numbers_input_3 = st.text_input("Enter a list of numbers separated by commas (e.g., 1, 2, 3, 4, 5):", key="numbers_input_3")
|
| 186 |
-
if numbers_input_3:
|
| 187 |
-
parts=numbers_input_3.split(",")
|
| 188 |
-
list1=[]
|
| 189 |
-
for i in parts:
|
| 190 |
-
i = i.strip()
|
| 191 |
-
if i.isdigit():
|
| 192 |
-
list1.append(int(i))
|
| 193 |
-
if list1:
|
| 194 |
-
result=geometric_mean(list1)
|
| 195 |
-
st.write("Geometric_Mean",result)
|
| 196 |
-
else:
|
| 197 |
-
st.write("No valid numbers provided.")
|
| 198 |
-
st.subheader("Harmonic Mean",divider=True)
|
| 199 |
-
st.markdown("""Harmonic Mean is used on data which have \n * Rates and Ratios \n * Data with Reciprocal Relationships
|
| 200 |
-
""")
|
| 201 |
-
st.subheader("Harmonic Mean for Population")
|
| 202 |
-
st.latex(r'''
|
| 203 |
-
\text{HM}_{\text{population}} = \frac{N}{\sum_{i=1}^{N} \frac{1}{x_i}}
|
| 204 |
-
''')
|
| 205 |
-
st.subheader("Harmonic Mean for Sample")
|
| 206 |
-
st.latex(r'''
|
| 207 |
-
\text{HM}_{\text{sample}} = \frac{n}{\sum_{i=1}^{n} \frac{1}{x_i}}
|
| 208 |
-
''')
|
| 209 |
-
def harmonic_mean(list1):
|
| 210 |
-
sum=reduce(lambda x,y: x+1/y,list1)
|
| 211 |
-
return round(len(list1)/sum,2)
|
| 212 |
-
st.title("Calculate Harmonic_Mean")
|
| 213 |
-
numbers_input_4 = st.text_input("Enter a list of numbers separated by commas (e.g., 1, 2, 3, 4, 5):", key="numbers_input_4")
|
| 214 |
-
if numbers_input_4:
|
| 215 |
-
parts=numbers_input_4.split(",")
|
| 216 |
-
list1=[]
|
| 217 |
-
for i in parts:
|
| 218 |
-
i = i.strip()
|
| 219 |
-
if i.isdigit():
|
| 220 |
-
list1.append(int(i))
|
| 221 |
-
if list1:
|
| 222 |
-
result=harmonic_mean(list1)
|
| 223 |
-
st.write("Geometric_Mean",result)
|
| 224 |
-
else:
|
| 225 |
-
st.write("No valid numbers provided.")
|
| 226 |
-
st.subheader("Measure Of Disperssion ",divider=True)
|
| 227 |
-
st.markdown("""Measure Of Disperssion will give spread of our collected data around the central value.It's classifed into two types
|
| 228 |
-
""")
|
| 229 |
-
st.markdown(''':violet[Absolute Measure] \n absolute will give the spread of data in one unit.for example if the given data is in 'cm'
|
| 230 |
-
the output will be in cm''')
|
| 231 |
-
st.markdown(''':violet[Relative Measure] \n Relative will be free from unit's''')
|
| 232 |
-
st.header("**Absolute Measure**")
|
| 233 |
-
st.subheader("Range",divider=True)
|
| 234 |
-
st.subheader("Quartile Deviation",divider=True)
|
| 235 |
-
st.subheader("Varience",divider=True)
|
| 236 |
-
st.subheader("Standard Deviation",divider=True)
|
| 237 |
-
st.header("**Relative Measure**")
|
| 238 |
-
st.subheader("Coefficent Of Range",divider=True)
|
| 239 |
-
st.subheader("Coefficent Of Quartile Deviation",divider=True)
|
| 240 |
-
st.subheader("Coefficent Of Varience",divider=True)
|
| 241 |
-
st.subheader("Coefficent Of Standard Deviation",divider=True)
|
| 242 |
-
st.markdown(''':orange[**Range**] is one of the measure to find the disperssion.But is not at all mostly used beause it don't focus on the entire data.
|
| 243 |
-
''')
|
| 244 |
-
st.subheader("Absolute Range")
|
| 245 |
-
st.latex(r'''
|
| 246 |
-
\text{Absolute Range} = \text{Maximum Value} - \text{Minimum Value}
|
| 247 |
-
''')
|
| 248 |
-
st.subheader("Relative Range")
|
| 249 |
-
st.latex(r'''
|
| 250 |
-
\text{Relative Range} = \frac{\text{Absolute Range}}{\text{Mean}} \times 100
|
| 251 |
-
''')
|
| 252 |
-
st.markdown(''':orange[**Quartile Deviation**] is one of the measure to find the disperssion.In this type the data is divided into 4 equal parts.
|
| 253 |
-
It will mostly focus on the central data.
|
| 254 |
-
''')
|
| 255 |
-
st.subheader("Absolute Quartile Deviation")
|
| 256 |
-
st.latex(r'''
|
| 257 |
-
QD = \frac{Q3 - Q1}{2}
|
| 258 |
-
''')
|
| 259 |
-
st.subheader("Relative Quartile Deviation")
|
| 260 |
-
st.latex(r'''
|
| 261 |
-
\text{Relative QD} = \frac{Q3 - Q1}{Q3 + Q1} \times 100
|
| 262 |
-
''')
|
| 263 |
-
st.markdown(''':orange[**Varience**] is one of the measure to find the disperssion.It is one of the best measure to find the disperssion.The only
|
| 264 |
-
drawback is when in Varience is in order to overcome negitive value we square them thus the distance is doubled
|
| 265 |
-
''')
|
| 266 |
-
st.subheader("Absolute Variance")
|
| 267 |
-
st.latex(r'''
|
| 268 |
-
\text{Var} = \frac{1}{N} \sum_{i=1}^{N} (x_i - \bar{x})^2
|
| 269 |
-
''')
|
| 270 |
-
st.subheader("Relative Variance")
|
| 271 |
-
st.latex(r'''
|
| 272 |
-
\text{Relative Var} = \frac{\text{Var}}{\bar{x}} \times 100
|
| 273 |
-
''')
|
| 274 |
-
st.markdown(''':orange[**Standard Deviation**] is one of the measure to find the disperssion.It is one of the best measure to find the disperssion.It over comes the
|
| 275 |
-
disadvantage occured in varience by square rooting it.
|
| 276 |
-
''')
|
| 277 |
-
st.subheader("Absolute Standard Deviation")
|
| 278 |
-
st.latex(r'''
|
| 279 |
-
\sigma = \sqrt{\frac{1}{N} \sum_{i=1}^{N} (x_i - \bar{x})^2}
|
| 280 |
-
''')
|
| 281 |
-
st.subheader("Relative Standard Deviation")
|
| 282 |
-
st.latex(r'''
|
| 283 |
-
\text{Relative SD} = \frac{\sigma}{\bar{x}}
|
| 284 |
-
''')
|
| 285 |
-
st.subheader("Distribution",divider=True)
|
| 286 |
-
st.markdown(''':blue[**Distribution**] is a measure will will tell how the shape of data or in which shape the data is spread.It will help in
|
| 287 |
-
analysis.There are few types of distribution \n * Normal Distribution \n * Uniform Distribution \n * Binomial Distribution \n * Poisson Distribution
|
| 288 |
-
\n * Exponential Distribution \n * Chi-Square Distribution \n * T-Distribution
|
| 289 |
-
''')
|
| 290 |
st.subheader("**2.2 Inferential Statistics**")
|
| 291 |
st.markdown("""This Inferential Statistics will describe the population based
|
| 292 |
on a sample data. This statistics will give predictions about a population based on sample.
|
|
|
|
| 45 |
st.markdown("""This Descriptive Statistics describe the main feature of data. This
|
| 46 |
descriptive statistics can be performed on sample data as well as population data. Some of
|
| 47 |
the key points of descriptive statistics are stated below.\n KEY COCEPTS\n * Measurement of Central Tendency which involves finding Mean, Median, and Mode.\n * Measurement of Dispersion which involves finding Range, Variance and Standard Deviation.\n * Distribution which gives how frequently the data is occurring some of examples of distribution are Gaussian, Random, and Normal distribution""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
st.subheader("**2.2 Inferential Statistics**")
|
| 49 |
st.markdown("""This Inferential Statistics will describe the population based
|
| 50 |
on a sample data. This statistics will give predictions about a population based on sample.
|