Spaces:

GlassWalker
/

GayFriendlyTownFinder

Running

App Files Files Community

GayFriendlyTownFinder / app.py

GlassWalker

Update app.py

b6f6d95 about 3 years ago

raw

history blame contribute delete

6.04 kB

	import joblib
	import xgboost
	import pandas as pd
	from bs4 import BeautifulSoup
	import numpy as np
	import requests
	from time import sleep
	from random import randint
	import re
	from lxml import etree
	from urllib.request import urlopen
	import gradio as gr

	uscities = pd.read_csv("final_cities_list.csv")
	uscities["city-state"] = ((uscities["name"]+"-"+uscities["state"]))
	def scrape_city_data(town_name):


	try:
	page = requests.get(f"https://www.city-data.com/city/{town_name}.html").text
	page2 = requests.get(f"https://www.city-data.com/poverty/poverty-{town_name}.html").text
	doc = BeautifulSoup(page, "html.parser")
	doc2 = BeautifulSoup(page2, "html.parser")

	sex_population = str(doc.find(id="population-by-sex"))
	(males, females) = [float(x) for x in re.findall(r"(?<=\()[0-9]+\.[0-9]+(?=\%\))", sex_population)]

	age_population = str(doc.find(id="median-age"))
	medianage = float(re.search("Median resident age:.\>([0-9]\.[0-9]).median age", age_population).groups()[0])

	coordinates = str(doc.find(id="coordinates"))
	latitude = float(re.findall(r"(?<=Latitude:</b> )[0-9].[0-9]", coordinates)[0])
	longitude = float(re.findall(r"(?<=Longitude:</b> )[0-9].[0-9]", coordinates)[0])

	education_level = str(doc.find(id="education-info"))
	highschoolgrads = float(re.findall("(?<=High school or higher:<\/b> )[0-9].[0-9]", education_level)[0])
	phds = float(re.findall(r"(?<=professional degree:<\/b> )[0-9].[0-9]", education_level)[0])

	poverty_level = str(doc2.find(id="rt"))
	below_poverty_level = float(re.findall(r"[0-9]\.[0-9]", poverty_level)[0])

	total_population = str(doc.find(id="city-population"))
	residents = float(re.findall(r"(?<=</b> )(?:[0-9]\,)*", total_population)[0].replace(",", ""))

	religion_population = doc.find(id="religion").find_all('tr')
	data = []
	for row in religion_population:
	columns = row.find_all('td')
	if columns:
	religion = columns[0].get_text(strip=True)
	number = columns[1].get_text(strip=True).replace(",", "").replace("-", "0")
	data.append([religion, int(number)])
	df = pd.DataFrame(data, columns=['religion', 'number'])
	df['percentage'] = (df['number'] / df['number'].sum()) * 100
	atheist = df[df.religion == "None"].iloc[0]["percentage"]
	evangelicals = df[df.religion == "Evangelical Protestant"].iloc[0]["percentage"]

	homosexual_households = str(doc.find(id="households-stats"))
	lesbians = float(re.findall("(?<=Lesbian couples:<\/b> )[0-9]*.[0-9]", homosexual_households)[0])
	gays = float(re.findall(r"(?<=Gay men:<\/b> )[0-9].[0-9]", homosexual_households)[0])

	rsd = pd.DataFrame(
	{"City-State": town_name, "PercentageMales": males, "MedianAge": medianage, "Latitude": latitude,
	"Longitude": longitude, "PercentageHighSchoolGrads": highschoolgrads, "PercentagePHDs": phds,
	"PercentageBelowPovertyLevel": below_poverty_level, "Population": residents,
	"PercentageNoReligion": atheist, "PercentageEvangelicals": evangelicals, "gays": gays,
	"lesbians": lesbians}, index=[0])

	return rsd
	except:
	print("INFORMATION IS NOT AVAILABLE")
	return pd.DataFrame({"City-State": "0", "PercentageMales": 0, "MedianAge": 0, "Latitude": 0, "Longitude": 0,
	"PercentageHighSchoolGrads": 0, "PercentagePHDs": 0, "PercentageBelowPovertyLevel": 0,
	"Population": 0, "PercentageNoReligion": 0, "PercentageEvangelicals": 0, "gays": 0,
	"lesbians": 0}, index=[0])


	def predict_city_score(clas_model, reg_model, input_data):

	class_result=clas_model.predict(input_data)

	if class_result==1:
	return 100
	else:
	return reg_model.predict(input_data)[0]


	cityfinalscores = pd.read_csv("cities.csv")

	clas_model=joblib.load('classifier_model.sav')
	reg_model=joblib.load('regressor_model.sav')

	# # 1. Ask the user for an input
	# city_input=input("Enter the name of the city and state [city]-[state]: ")

	def Finalresult (Pacific,Mountain,Central,Eastern,Other):
	city_input = Pacific + Mountain + Central + Eastern + Other
	data = scrape_city_data(city_input)
	if city_input in list(cityfinalscores["City-State"]):
	return ("the city " + city_input + " has a score of: ", cityfinalscores[cityfinalscores["City-State"]==city_input]["Final-Score"].item())

	else:
	# 2. Scrape the data for that city


	if data["City-State"].item() == "0":
	return("NO INFORMATION AVAILABLE, PLEASE TRY WITH A DIFFERENT CITY")
	else:
	predicted_score=predict_city_score(clas_model, reg_model, input_data=data.iloc[:,1:])
	return("the city "+city_input+" has a score of: ", predicted_score)


	pst = uscities[uscities["Time Zone"]=="PST"]["city-state"].drop_duplicates().to_list()
	mst = uscities[uscities["Time Zone"]=="MST"]["city-state"].drop_duplicates().to_list()
	cst = uscities[uscities["Time Zone"]=="CST"]["city-state"].drop_duplicates().to_list()
	est = uscities[uscities["Time Zone"]=="EST"]["city-state"].drop_duplicates().to_list()
	other = uscities[(uscities["Time Zone"]=="AKST")\|(uscities["Time Zone"]=="HST")]["city-state"].drop_duplicates().to_list()

	outputs = gr.outputs.Textbox()
	Pacific = gr.Dropdown(choices= pst,value= "")
	Mountain = gr.Dropdown(choices= mst,value= "")
	Central = gr.Dropdown(choices= cst,value= "")
	Eastern = gr.Dropdown(choices= est,value= "")
	Other = gr.Dropdown(choices= other,value= "")
	app = gr.Interface(fn = Finalresult, inputs=[Pacific,Mountain,Central,Eastern,Other], outputs = outputs, description = "From the drop down list select the city and state you're interested in to find out its projected LGBTQIA2+ Equality Index Score. Scores range from 0 to 100, a perfect score.")
	app.launch()