| | import joblib |
| | import xgboost |
| | import pandas as pd |
| | from bs4 import BeautifulSoup |
| | import numpy as np |
| | import requests |
| | from time import sleep |
| | from random import randint |
| | import re |
| | from lxml import etree |
| | from urllib.request import urlopen |
| | import gradio as gr |
| |
|
| | uscities = pd.read_csv("final_cities_list.csv") |
| | uscities["city-state"] = ((uscities["name"]+"-"+uscities["state"])) |
| | def scrape_city_data(town_name): |
| |
|
| |
|
| | try: |
| | page = requests.get(f"https://www.city-data.com/city/{town_name}.html").text |
| | page2 = requests.get(f"https://www.city-data.com/poverty/poverty-{town_name}.html").text |
| | doc = BeautifulSoup(page, "html.parser") |
| | doc2 = BeautifulSoup(page2, "html.parser") |
| |
|
| | sex_population = str(doc.find(id="population-by-sex")) |
| | (males, females) = [float(x) for x in re.findall(r"(?<=\()[0-9]+\.[0-9]+(?=\%\))", sex_population)] |
| |
|
| | age_population = str(doc.find(id="median-age")) |
| | medianage = float(re.search("Median resident age:.*\>([0-9]*\.[0-9]*).*median age", age_population).groups()[0]) |
| |
|
| | coordinates = str(doc.find(id="coordinates")) |
| | latitude = float(re.findall(r"(?<=Latitude:</b> )[0-9]*.[0-9]*", coordinates)[0]) |
| | longitude = float(re.findall(r"(?<=Longitude:</b> )[0-9]*.[0-9]*", coordinates)[0]) |
| |
|
| | education_level = str(doc.find(id="education-info")) |
| | highschoolgrads = float(re.findall("(?<=High school or higher:<\/b> )[0-9]*.[0-9]*", education_level)[0]) |
| | phds = float(re.findall(r"(?<=professional degree:<\/b> )[0-9]*.[0-9]*", education_level)[0]) |
| |
|
| | poverty_level = str(doc2.find(id="rt")) |
| | below_poverty_level = float(re.findall(r"[0-9]*\.[0-9]*", poverty_level)[0]) |
| |
|
| | total_population = str(doc.find(id="city-population")) |
| | residents = float(re.findall(r"(?<=</b> )(?:[0-9]*\,*)*", total_population)[0].replace(",", "")) |
| |
|
| | religion_population = doc.find(id="religion").find_all('tr') |
| | data = [] |
| | for row in religion_population: |
| | columns = row.find_all('td') |
| | if columns: |
| | religion = columns[0].get_text(strip=True) |
| | number = columns[1].get_text(strip=True).replace(",", "").replace("-", "0") |
| | data.append([religion, int(number)]) |
| | df = pd.DataFrame(data, columns=['religion', 'number']) |
| | df['percentage'] = (df['number'] / df['number'].sum()) * 100 |
| | atheist = df[df.religion == "None"].iloc[0]["percentage"] |
| | evangelicals = df[df.religion == "Evangelical Protestant"].iloc[0]["percentage"] |
| |
|
| | homosexual_households = str(doc.find(id="households-stats")) |
| | lesbians = float(re.findall("(?<=Lesbian couples:<\/b> )[0-9]*.[0-9]", homosexual_households)[0]) |
| | gays = float(re.findall(r"(?<=Gay men:<\/b> )[0-9]*.[0-9]*", homosexual_households)[0]) |
| |
|
| | rsd = pd.DataFrame( |
| | {"City-State": town_name, "PercentageMales": males, "MedianAge": medianage, "Latitude": latitude, |
| | "Longitude": longitude, "PercentageHighSchoolGrads": highschoolgrads, "PercentagePHDs": phds, |
| | "PercentageBelowPovertyLevel": below_poverty_level, "Population": residents, |
| | "PercentageNoReligion": atheist, "PercentageEvangelicals": evangelicals, "gays": gays, |
| | "lesbians": lesbians}, index=[0]) |
| |
|
| | return rsd |
| | except: |
| | print("INFORMATION IS NOT AVAILABLE") |
| | return pd.DataFrame({"City-State": "0", "PercentageMales": 0, "MedianAge": 0, "Latitude": 0, "Longitude": 0, |
| | "PercentageHighSchoolGrads": 0, "PercentagePHDs": 0, "PercentageBelowPovertyLevel": 0, |
| | "Population": 0, "PercentageNoReligion": 0, "PercentageEvangelicals": 0, "gays": 0, |
| | "lesbians": 0}, index=[0]) |
| |
|
| |
|
| | def predict_city_score(clas_model, reg_model, input_data): |
| | |
| | class_result=clas_model.predict(input_data) |
| | |
| | if class_result==1: |
| | return 100 |
| | else: |
| | return reg_model.predict(input_data)[0] |
| |
|
| |
|
| | cityfinalscores = pd.read_csv("cities.csv") |
| |
|
| | clas_model=joblib.load('classifier_model.sav') |
| | reg_model=joblib.load('regressor_model.sav') |
| |
|
| | |
| | |
| |
|
| | def Finalresult (Pacific,Mountain,Central,Eastern,Other): |
| | city_input = Pacific + Mountain + Central + Eastern + Other |
| | data = scrape_city_data(city_input) |
| | if city_input in list(cityfinalscores["City-State"]): |
| | return ("the city " + city_input + " has a score of: ", cityfinalscores[cityfinalscores["City-State"]==city_input]["Final-Score"].item()) |
| |
|
| | else: |
| | |
| |
|
| |
|
| | if data["City-State"].item() == "0": |
| | return("NO INFORMATION AVAILABLE, PLEASE TRY WITH A DIFFERENT CITY") |
| | else: |
| | predicted_score=predict_city_score(clas_model, reg_model, input_data=data.iloc[:,1:]) |
| | return("the city "+city_input+" has a score of: ", predicted_score) |
| |
|
| |
|
| | pst = uscities[uscities["Time Zone"]=="PST"]["city-state"].drop_duplicates().to_list() |
| | mst = uscities[uscities["Time Zone"]=="MST"]["city-state"].drop_duplicates().to_list() |
| | cst = uscities[uscities["Time Zone"]=="CST"]["city-state"].drop_duplicates().to_list() |
| | est = uscities[uscities["Time Zone"]=="EST"]["city-state"].drop_duplicates().to_list() |
| | other = uscities[(uscities["Time Zone"]=="AKST")|(uscities["Time Zone"]=="HST")]["city-state"].drop_duplicates().to_list() |
| |
|
| | outputs = gr.outputs.Textbox() |
| | Pacific = gr.Dropdown(choices= pst,value= "") |
| | Mountain = gr.Dropdown(choices= mst,value= "") |
| | Central = gr.Dropdown(choices= cst,value= "") |
| | Eastern = gr.Dropdown(choices= est,value= "") |
| | Other = gr.Dropdown(choices= other,value= "") |
| | app = gr.Interface(fn = Finalresult, inputs=[Pacific,Mountain,Central,Eastern,Other], outputs = outputs, description = "From the drop down list select the city and state you're interested in to find out its projected LGBTQIA2+ Equality Index Score. Scores range from 0 to 100, a perfect score.") |
| | app.launch() |
| |
|
| |
|