GlassWalker's picture
Update app.py
b6f6d95
import joblib
import xgboost
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
import requests
from time import sleep
from random import randint
import re
from lxml import etree
from urllib.request import urlopen
import gradio as gr
uscities = pd.read_csv("final_cities_list.csv")
uscities["city-state"] = ((uscities["name"]+"-"+uscities["state"]))
def scrape_city_data(town_name):
try:
page = requests.get(f"https://www.city-data.com/city/{town_name}.html").text
page2 = requests.get(f"https://www.city-data.com/poverty/poverty-{town_name}.html").text
doc = BeautifulSoup(page, "html.parser")
doc2 = BeautifulSoup(page2, "html.parser")
sex_population = str(doc.find(id="population-by-sex"))
(males, females) = [float(x) for x in re.findall(r"(?<=\()[0-9]+\.[0-9]+(?=\%\))", sex_population)]
age_population = str(doc.find(id="median-age"))
medianage = float(re.search("Median resident age:.*\>([0-9]*\.[0-9]*).*median age", age_population).groups()[0])
coordinates = str(doc.find(id="coordinates"))
latitude = float(re.findall(r"(?<=Latitude:</b> )[0-9]*.[0-9]*", coordinates)[0])
longitude = float(re.findall(r"(?<=Longitude:</b> )[0-9]*.[0-9]*", coordinates)[0])
education_level = str(doc.find(id="education-info"))
highschoolgrads = float(re.findall("(?<=High school or higher:<\/b> )[0-9]*.[0-9]*", education_level)[0])
phds = float(re.findall(r"(?<=professional degree:<\/b> )[0-9]*.[0-9]*", education_level)[0])
poverty_level = str(doc2.find(id="rt"))
below_poverty_level = float(re.findall(r"[0-9]*\.[0-9]*", poverty_level)[0])
total_population = str(doc.find(id="city-population"))
residents = float(re.findall(r"(?<=</b> )(?:[0-9]*\,*)*", total_population)[0].replace(",", ""))
religion_population = doc.find(id="religion").find_all('tr')
data = []
for row in religion_population:
columns = row.find_all('td')
if columns:
religion = columns[0].get_text(strip=True)
number = columns[1].get_text(strip=True).replace(",", "").replace("-", "0")
data.append([religion, int(number)])
df = pd.DataFrame(data, columns=['religion', 'number'])
df['percentage'] = (df['number'] / df['number'].sum()) * 100
atheist = df[df.religion == "None"].iloc[0]["percentage"]
evangelicals = df[df.religion == "Evangelical Protestant"].iloc[0]["percentage"]
homosexual_households = str(doc.find(id="households-stats"))
lesbians = float(re.findall("(?<=Lesbian couples:<\/b> )[0-9]*.[0-9]", homosexual_households)[0])
gays = float(re.findall(r"(?<=Gay men:<\/b> )[0-9]*.[0-9]*", homosexual_households)[0])
rsd = pd.DataFrame(
{"City-State": town_name, "PercentageMales": males, "MedianAge": medianage, "Latitude": latitude,
"Longitude": longitude, "PercentageHighSchoolGrads": highschoolgrads, "PercentagePHDs": phds,
"PercentageBelowPovertyLevel": below_poverty_level, "Population": residents,
"PercentageNoReligion": atheist, "PercentageEvangelicals": evangelicals, "gays": gays,
"lesbians": lesbians}, index=[0])
return rsd
except:
print("INFORMATION IS NOT AVAILABLE")
return pd.DataFrame({"City-State": "0", "PercentageMales": 0, "MedianAge": 0, "Latitude": 0, "Longitude": 0,
"PercentageHighSchoolGrads": 0, "PercentagePHDs": 0, "PercentageBelowPovertyLevel": 0,
"Population": 0, "PercentageNoReligion": 0, "PercentageEvangelicals": 0, "gays": 0,
"lesbians": 0}, index=[0])
def predict_city_score(clas_model, reg_model, input_data):
class_result=clas_model.predict(input_data)
if class_result==1:
return 100
else:
return reg_model.predict(input_data)[0]
cityfinalscores = pd.read_csv("cities.csv")
clas_model=joblib.load('classifier_model.sav')
reg_model=joblib.load('regressor_model.sav')
# # 1. Ask the user for an input
# city_input=input("Enter the name of the city and state [city]-[state]: ")
def Finalresult (Pacific,Mountain,Central,Eastern,Other):
city_input = Pacific + Mountain + Central + Eastern + Other
data = scrape_city_data(city_input)
if city_input in list(cityfinalscores["City-State"]):
return ("the city " + city_input + " has a score of: ", cityfinalscores[cityfinalscores["City-State"]==city_input]["Final-Score"].item())
else:
# 2. Scrape the data for that city
if data["City-State"].item() == "0":
return("NO INFORMATION AVAILABLE, PLEASE TRY WITH A DIFFERENT CITY")
else:
predicted_score=predict_city_score(clas_model, reg_model, input_data=data.iloc[:,1:])
return("the city "+city_input+" has a score of: ", predicted_score)
pst = uscities[uscities["Time Zone"]=="PST"]["city-state"].drop_duplicates().to_list()
mst = uscities[uscities["Time Zone"]=="MST"]["city-state"].drop_duplicates().to_list()
cst = uscities[uscities["Time Zone"]=="CST"]["city-state"].drop_duplicates().to_list()
est = uscities[uscities["Time Zone"]=="EST"]["city-state"].drop_duplicates().to_list()
other = uscities[(uscities["Time Zone"]=="AKST")|(uscities["Time Zone"]=="HST")]["city-state"].drop_duplicates().to_list()
outputs = gr.outputs.Textbox()
Pacific = gr.Dropdown(choices= pst,value= "")
Mountain = gr.Dropdown(choices= mst,value= "")
Central = gr.Dropdown(choices= cst,value= "")
Eastern = gr.Dropdown(choices= est,value= "")
Other = gr.Dropdown(choices= other,value= "")
app = gr.Interface(fn = Finalresult, inputs=[Pacific,Mountain,Central,Eastern,Other], outputs = outputs, description = "From the drop down list select the city and state you're interested in to find out its projected LGBTQIA2+ Equality Index Score. Scores range from 0 to 100, a perfect score.")
app.launch()