Spaces:

klgold
/

ds110probs

Running

Kevin Gold

Lecture Python examples added

daebe13 over 1 year ago

73.4 kB


	# Lecture2HelloWorldAndExpressions.py
	print('Hello, world!')
	print('Hello 1')
	print('Hello 2')
	print('Hello 3')
	print('Hello, world!')
	print(Hello, world!)
	print(Hello, world!) # Intentionally creates an error!
	print(1) # Technically an expression
	print(1+2) # Two operands and an operator make an expression
	print(10*(10+1)) # The expression (10+1) acting as an operand
	print(3 + 8 / 2) # What do you predict?
	print(4 * 2 + 3 + 5 * 2) # And this one?
	print('Hello', 'world', '!')
	print(max(2,5,7))
	print(max(2,7) + max(3,9)) # Using function calls as operands
	print(max(2,7) + max(3,9)) # Calc 7, calc 9, then add
	1
	2
	3
	max(2,7)
	None
	print(2) + 2
	print('Hello, world!')
	max(2 8, 3 6, 5 ** 3)
	1.0000000000000001 - 1
	print(type(-100)) # int
	print(type(10.1)) # float
	print(type('A')) # str
	print(type(True)) # bool
	print(type('10')) # str
	print(type(10)) # int
	print(type(10.0)) # float
	print(type(True)) # bool
	0.1 + 0.1 + 0.1
	'Hello ' + 1111
	'Hello ' + 'world' + '!'
	'Hello ' + str(1111)
	20 * 9/5 + 32
	print('Temp: 68.0 F')
	print('Temp: ' + 20 * 9/5 + 32 + ' F')
	print('Temp: ' + str(20 * 9/5 + 32) + ' F')
	# Lecture3VariablesAndConditions.py
	two_to_the_eighth = 2 ** 8
	print(two_to_the_eighth)
	two_to_the_eighth * 2
	pay_per_hour = 18
	pay_per_hour = 20 # Pay raise!
	print(pay_per_hour)
	counter = 0
	counter = counter + 1 # It's an instruction, not an equality!
	print(counter)
	counter = counter + 1
	print(counter)
	pay_per_hour = 20
	hours = 40
	total_pay = pay_per_hour * hours
	print(total_pay)
	Pay_Per_Hour = 15 # please avoid this capitalization style!
	print(pay_per_hour) # remembers the lowercase value
	silent_assignment = 0
	20 = pay_per_hour
	print(undefined_var + 7)
	color = input('What is your favorite color? ')
	print('Yeah, ' + color + ' is pretty great!')
	to_square_str = input('What should I square? ')
	print(int(to_square_str) ** 2)
	city = input('What city are we in? ')
	print(city == 'Boston')

	answer = input('What is 2+2? ')
	print(answer == 4) # not going to work
	answer == '4' # but this works
	int(answer) == 4 # or this
	float(answer) == 4 # or even this
	print(1 < 1)
	print(1 > 1)
	print(1 != 1)
	print(1 <= 1)
	print(1 >= 1)
	print('aardvark' < 'zebra')
	print('capitalized' == 'Capitalized')
	2 + 5 > 7 - 4 # 5 > 7 would be false, but (2+5) > (7-4) is True
	total = 0
	value_str = input('Enter a value: ')
	value_int = int(value_str)
	if value_int < 0:
	print('Sorry, that was a negative value.')
	else:
	total = total + value_int
	print(total)
	if condition:
	statement_if_true1
	statement_if_true2
	statement_if_true3
	...
	else:
	statement_if_false1
	statement_if_false2
	...
	statement_regardless1
	statement_regardless2
	...
	value = int(input('Enter an integer:'))
	if value < 0:
	print('Negative')
	else:
	print('Positive')
	print('Done')
	password = input('Enter the password: ')
	if password == '1234':
	print('Correct!')
	print('Your account has $1000000 in it.')
	else:
	print('Incorrect.')
	print('Have a nice day.')
	num1_str = input('Enter an integer: ')
	num2_str = input('Enter a different integer: ')
	num1_int = int(num1_str)
	num2_int = int(num2_str)
	if num1_str == num2_str:
	print('The numbers were supposed to be different...')
	print('But you entered ' + num1_str + ' twice!')
	else:
	print(num2_str + ' divided by ' + num1_str + ' is...')
	print(num2_int / num1_int) # Divide by zero would be error, btw
	print('Done...')
	language = input('What is your favorite language? ')
	if language == 'Python':
	print('Mine too!')
	print('But there sure are a lot of languages out there....')
	value = int(input('Enter an integer between 0 and 100: '))
	if value < 0:
	print('No negative numbers!')
	elif value > 100:
	print('That value is too large!')
	elif value == 42:
	print('That was the number I was thinking of!')
	else:
	print('Guess again.')

	value = int(input('Enter an integer between 0 and 100: '))
	if value < 0:
	print('No negative numbers!')
	elif value > 100:
	print('That value is too large!')
	elif value >= 50:
	print('Big!')
	else:
	print('Small!')
	value = int(input('Enter an integer between 0 and 100: '))
	if value < 0:
	print('No negative numbers!')
	else:
	if value > 100:
	print('That value is too large!')
	else:
	if value >= 50:
	print('Big!')
	else:
	print('Small!')
	age = int(input('Enter your age: '))
	if age < 18:
	if age < 5:
	print('Just a toddler, then.')
	elif age < 12:
	print('Not quite a teenager, then.')
	else:
	print('Teenage years ... a difficult time!')
	else:
	print('An adult, then.')
	if age >= 55:
	print('And a senior citizen, too!')
	num1 = int(input('First number: '))
	num2 = int(input('Second number: '))
	num3 = int(input('Third number: '))
	my_max = max(num1, num2, num3)
	my_min = min(num1, num2, num3)
	my_mean = (num1+num2+num3)/3 # Note importance of parens!
	print('Min: ' + str(my_min))
	print('Max: ' + str(my_max))
	print('Mean: ' + str(my_mean))
	if num1 == num2:
	print(str(num1) + ' was repeated')
	elif num2 == num3:
	print(str(num2) + ' was repeated')
	elif num1 == num3:
	print(str(num3) + ' was repeated')
	else:
	print('The numbers were unique')
	# Lecture4WhileAndLists.py
	string = input('Enter a number: ')
	while string != 'stop':
	print(string + ' squared is ' + str(int(string) ** 2))
	string = input('Enter a number: ')
	print('Done.')
	counter = 0
	while counter < 21:
	print(counter)
	counter = counter + 1
	print(counter)
	counter = 1
	print('We will now iterate three times...')
	while counter < 4:
	print('Iteration ' + str(counter))
	counter = counter + 1
	total = 0
	count = 0
	value_str = input('Enter a number, or "done" if done: ')
	while value_str != 'done':
	count = count + 1
	value_int = int(value_str)
	total = total + value_int
	value_str = input('Enter a number, or "done" if done: ')
	if count > 0:
	print('The average is ' + str(total/count))
	total = 0
	count = 0
	value_str = input('Enter a number, or "done" if done: ')
	while value_str != 'done':
	count += 1
	value_int = int(value_str)
	total += value_int
	value_str = input('Enter a number, or "done" if done: ')
	if count > 0:
	print('The average is ' + str(total/count))
	while(True):
	input('Enter any input to get a compliment: ')
	print('That is so clever of you!')
	my_list = ['duck', 'duck', 'goose'] # A list with 3 items
	print(my_list[0])
	print(my_list[1])
	print(my_list[2])
	my_list = ['duck', 'duck', 'goose']
	my_list[2] = 'bear'
	print(my_list)
	my_list = [1, 2, 3]
	my_list.append(4)
	print(my_list) # my_list has changed...
	print(my_list.append(5))
	print(my_list)
	shopping_list = []
	item = input('Add an item to the shopping list (or "done"): ')
	while item.lower() != 'done':
	shopping_list.append(item)
	item = input('Add an item to the shopping list (or "done"): ')
	print('Okay, so that was: ')
	print(shopping_list)
	[1, 2, 3] + [4, 5, 6]
	print(len('Hello'))
	print(len([1, 2, 3]))
	my_items = ['eggs', 'flour', 'milk']
	print(len(my_items), 'items')
	print(my_items[2])
	print(my_items[len(my_items)-1])
	planet_diameter_km = [4879, 12104, 12756, 6792, 142984, 120536, 51118, 49528, 2377]
	planet_diameter_km.sort()
	planet_diameter_km
	my_list1 = [3, 2, 1]
	my_list2 = my_list1
	my_list1.sort()
	print(my_list1)
	print(my_list2)
	my_list1 = [3, 2, 1]
	my_list2 = my_list1.copy()
	my_list1.sort()
	print(my_list1)
	print(my_list2)
	honors = ['Albert', 'Berenice', 'Chen', 'Dominique']
	mentioned_honors = []
	nonhonors = []
	student = input('Enter a name (or "done"): ')
	while (student != 'done'):
	if student in honors:
	print('Honors!')
	mentioned_honors.append(student)
	else:
	print('Not honors...')
	nonhonors.append(student)
	student = input('Enter a name (or "done"): ')
	print('Honors mentioned: ' + str(mentioned_honors))
	print('Nonhonors mentioned: ' + str(nonhonors))
	# Lecture5MorePower.py
	percent = input('Enter a percentage between 0 and 100:')
	if float(percent) >= 0 and float(percent) <= 100:
	if float(percent) >= 10:
	print('A decent return on investment....')
	else:
	print('Not a great return on investment....')
	else:
	print('That is not in the requested range!')
	vip = False
	spent = 10
	if vip or spent >= 10000:
	print('Send this person a loyalty reward!')
	else:
	print('This person deserves nothing!')
	vip = False
	if not vip:
	print('Have you considered signing up to join the VIP program?')
	else:
	print('Welcome back, VIP customer!')
	vip = False
	spent = 0
	if not vip or spent < 10000: # "not" applied to vip before "or"
	print('Please spend more')
	else:
	print('Hello, valued patron!')
	vip = False
	spent = 0
	if not (vip or spent < 10000): # within parens evaluates to True
	print('Please spend more')
	else:
	print('Hello, valued patron!')
	my_list = [1,2,3]
	my_list2 = [7,8,9]
	if not 4 in my_list and not 4 in my_list2:
	print('No 4 found')
	my_list = [1,2,3]
	my_list2 = [7,8,9]
	if 4 not in my_list and not in my_list2:
	print('This will actually cause an error - not how "in" works')
	import math
	math.sqrt(2)
	import math as m
	m.sqrt(2)
	from math import sqrt as my_sqrt
	my_sqrt(2)
	get_ipython().system('python3 -m ensurepip --upgrade')
	get_ipython().system('pip install seaborn')
	import seaborn as sns
	df = sns.load_dataset("penguins") # Load a dataset about penguins
	sns.jointplot(data=df, x="flipper_length_mm", y="bill_length_mm", hue="species")
	import statistics
	statistics.median([1, 2, 3, 4])
	import statistics
	statistics.median([1, 2, 3, 4])
	total = 0
	count = 0
	value_str = input('Enter a number, or "done" if done: ')
	while value_str != 'done':
	count = count + 1
	value_int = int(value_str)
	total = total + value_int
	value_str = input('Enter a number, or "done" if done: ')
	if count > 0:
	print('The average is ' + str(total/count))
	total = 0
	count = 0
	value_str = input('Enter a non-negative integer, or "done" if done: ')
	while value_str != 'done':
	if not value_str.isdigit():
	print('Non-negative integers only!')
	else:
	count = count + 1
	value_int = int(value_str)
	total = total + value_int
	value_str = input('Enter a non-negative integer, or "done" if done: ')
	if count > 0:
	print('The average is ' + str(total/count))
	total = 0
	count = 0
	value_str = input('Enter a number, or "done" if done: ')
	while value_str != 'done':
	count = count + 1
	value_int = int(value_str)
	total = total + value_int
	print(value_str)
	if count > 0:
	print('The average is ' + str(total/count))
	3 = my_list
	total = 0
	count = 0
	value_str = input('Enter a number, or "done" if done: ')
	count = count + 1
	value_int = int(value_str)
	total = total + value_int
	if count > 0:
	print('The average is ' + str(total/count))
	# Lecture6and7Iteration.py
	people = ['Alice', 'Bob', 'Che']
	index = 0
	while index < len(people):
	person = people[index]
	print('Hooray for ' + person + '!')
	index += 1
	people = ['Alice', 'Bob', 'Che']
	for person in people:
	print('Hooray for ' + person + '!')
	running_total = 0
	numbers = [1,2,3,4,10]
	for n in numbers:
	running_total = running_total + n # Could be abbreviated running_total += n
	print('Sum so far: ' + str(running_total))
	print('Sum: ' + str(running_total))
	my_grades = [4, 3, 2, 3, 4]
	letter_grades = []
	for g in my_grades:
	if g == 4:
	letter_grades.append('A')
	elif g == 3:
	letter_grades.append('B')
	elif g == 2:
	letter_grades.append('C')
	print(letter_grades)
	temps_f = [36, 39, 45, 56, 66, 76, 81, 80, 72, 61, 51, 41] # Jan through Dec
	temps_c = []
	for t in temps_f:
	degrees_c = (t - 32)*5/9
	temps_c.append(round(degrees_c, 2)) # Round to 2 decimal places
	temps_c
	my_car = ("Honda Fit", 2010, 30, 10000)
	print(my_car)
	car_type, year, mpg, price = my_car
	print(mpg)
	print(my_car[0] + ' prints successfully') # OK
	my_car[0] = 'bad value' # Not OK, trying to change the tuple
	my_movies = [("No", 4), ("Rogue One", 4.5), ("Casablanca", 5)]
	for moviename, stars in my_movies: # Notice the two variable names
	print ('I would rate ' + moviename + ' ' + str(stars) + ' stars')
	my_movies = [("No", 4), ("Rogue One", 4.5), ("Casablanca", 5)]
	best_rating = 0 # Initialize with a value that is definitely beat
	best_movie = "none"
	for movie, rating in my_movies:
	if rating > best_rating:
	best_rating = rating
	best_movie = movie
	print("Best movie: " + best_movie + "...rating = " + str(best_rating))
	movies = ['Fall Guy', 'Free Guy', 'Cable Guy']
	ratings = [5, 4, 3]
	for movie, rating in zip(movies, ratings):
	print("I'd rate " + movie + " a " + str(rating))
	sw_movies = [('The Phantom Menace', 52),
	('Attack of the Clones', 65),
	('Revenge of the Sith', 80),
	('Rogue One', 84),
	('Solo', 70),
	('Star Wars', 92),
	('The Empire Strikes Back',94),
	('Return of the Jedi', 82),
	('The Force Awakens', 93),
	('The Last Jedi', 90),
	('The Rise of Skywalker', 51)]
	my_list = []
	for movie, score in sw_movies:
	if score >= 80:
	my_list.append(movie)
	print(my_list)
	for i in range(5):
	print ("Iteration " + str(i))
	for i in range(1,6):
	print(i)
	my_itinerary = ['Boston', 'Atlanta', 'LA', 'Seattle']
	for idx in range(len(my_itinerary)-1): # Avoid indexing out of bounds
	print(my_itinerary[idx] + '-' + my_itinerary[idx+1])
	names = ['Alice', 'Bob', 'Charlie', 'Dora']
	for number, name in enumerate(names):
	print(name + ' ' + str(number))
	for movie, rating in sw_movies:
	print('Looking at ' + movie)
	if movie == 'Rogue One':
	print('The rating of Rogue One is ' + str(rating))
	break # We don't need to look at any other entries
	print('Done')
	my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2],
	[100.2, 99.9, 100.0, 103.1]]
	my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2],
	[100.2, 99.9, 100.0, 103.1]]
	my_two_stock_histories[1]
	my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2],
	[100.2, 99.9, 100.0, 103.1]]
	my_two_stock_histories[1][2]
	my_stock_histories = my_two_stock_histories.copy()
	my_stock_histories.append([5.0, 9.0, 6.0, 7.0])
	print(my_stock_histories)
	print('Stock 0 closing prices: ')
	for price in my_stock_histories[0]:
	print(price)
	print('Starting prices for all stocks:')
	for stock_list in my_stock_histories:
	print(stock_list[0])
	letters = ['a', 'b', 'c','d','e','f','g','h','i','j']
	print('All possible coordinates in Battleship:')
	for l in letters:
	for n in range(1,11):
	print(l + str(n))
	bills = [[1, 2, 3], [4,5,6], [7,8,9]]
	my_totals = [] # empty list
	for l in bills:
	print('new list')
	listsum = 0
	for l2 in l: # iterating over the list we got from the outer foreach
	print('adding ' + str(l2))
	listsum += l2
	my_totals.append(listsum)
	print('Bill sums:' + str(my_totals))
	print('Possible matchups:')
	players = ['Alice', 'Bobby', 'Caspar', 'Dmitri', 'Eve']
	for white_player in players:
	for black_player in players:
	print("White: " + white_player + "; Black player: " + black_player)
	print('Possible matchups:')
	players = ['Alice', 'Bobby', 'Caspar', 'Dmitri', 'Eve']
	for white_player in players:
	for black_player in players:
	if not white_player == black_player:
	print("White: " + white_player + "; Black player: " + black_player)
	my_multiples_of_3 = [v * 3 for v in range(5)]
	my_multiples_of_3
	unrounded = [1.9, 5.3, 9.9]
	rounded = [round(i,0) for i in unrounded]
	rounded
	unrounded = [1.9, 5.3, 9.9]
	rounded = []
	for item in unrounded:
	rounded.append(round(item,0))
	print(rounded)
	temps_f = [36, 39, 45, 56, 66, 76, 81, 80, 72, 61, 51, 41] # Jan through Dec
	temps_c = [round((t-32)*5/9,2) for t in temps_f]
	temps_c
	times = [(2,30), (4,10), (1, 30), (0,40), (0, 20)]
	minutes = [t[0]*60 + t[1] for t in times]
	minutes
	# Lecture8and9Functions.py
	def add_an_s(string):
	new_string = string + 's'
	return new_string
	add_an_s('example') + '!'
	records = read_customer_data('input.csv')
	sales = 0
	purchase_counts = []
	s_names = []
	for record in records:
	name, purchase_list, sale_info = parse_record(record)
	s_names.append(standardize_name(name))
	sales = update_total_sales(sales, sale_info)
	update_purchase_counts(purchase_counts, purchase_list)
	write_to_file(s_names, purchase_counts, sales, 'output.csv')
	def add_two(my_number):
	# Adds two to the argument.
	return my_number + 2
	add_two(2)
	def count_matches(to_match, my_list):
	# Counts how many times to_match appears in my_list
	count = 0
	for m in my_list:
	if to_match == m:
	count += 1
	return count
	print(count_matches(5, [5, 6, 7, 5]))
	print(count_matches("foo", ["foo","bar","baz"]))
	def percent_gain(start, finish):
	return (finish-start)/start * 100
	print(percent_gain(36585.06, 33147.25))
	print(percent_gain(4796.56, 3839.50))
	print(percent_gain(15832.80, 10466.48))
	def get_rating(movie_tuple):
	# More readable way to access a movie rating
	return movie_tuple[1]
	get_rating(('Portrait of a Lady on Fire', 5))
	def with_tax(price, tax):
	return round(price * (1 + tax * .01), 2)
	with_tax(1,8.6)
	from datetime import date
	def greet_user():
	print("Hello, user!")
	print("Today's date is " + str(date.today()))
	greet_user()
	def greet_user():
	print("Hello, user!")
	print("Today's date is " + str(date.today()))
	return
	print(greet_user())
	def longest_customer_name(list_of_names):
	# Find the longest customer name, and how long it is
	# (maybe so we can display the names nicely later)
	longest_len = 0
	longest_name = ""
	for n in list_of_names:
	if len(n) > longest_len:
	longest_len = len(n)
	longest_name = n
	return longest_name, longest_len
	name, length = longest_customer_name(['Alice', 'Bob', 'Cassia'])
	print(name)
	print(length)
	from statistics import mean
	def min_mean_max(L):
	return min(L), mean(L), max(L)
	min_mean_max([1,2,3,4,5])
	def count_items(lst):
	# Count items but warn if the list is empty
	if (len(lst) == 0):
	print('Warning: empty list passed to count_items!')
	return 0
	print("We don't get here with an empty list")
	return len(lst)
	count_items([])
	def is_prime(n):
	for i in range(2, n): # Look for a divisor
	if n % i == 0: # i divides n evenly, no remainder
	return False
	return True # didn't find a divisor
	print(is_prime(11))
	print(is_prime(4))
	def longest_customer_name(list_of_names):
	# Find the longest customer name, and how long it is
	# (maybe so we can display the names nicely later)
	longest_len = 0
	longest_name = ""
	for n in list_of_names:
	if len(n) > longest_len:
	longest_len = len(n)
	longest_name = n
	return longest_name, longest_len
	def count_matches(to_match, my_list):
	# Counts how many times to_match appears in my_list
	count = 0
	for m in my_list:
	if to_match == m:
	count += 1
	return count
	def count_longest_name(list_of_names):
	# Count how many times the longest name appears in the list
	# Makes use of functions defined above
	word, length = longest_customer_name(list_of_names)
	return count_matches(word,list_of_names)
	count_longest_name(['Alice','Bob','Catherine','Catherine'])
	def all_names_short_enough1(names, limit):
	for name in names:
	if len(name) > limit:
	return False
	return True
	print(all_names_short_enough1(['Alice', 'Bob'], 3))
	print(all_names_short_enough1(['Alice', 'Bob'], 5))
	def all_names_short_enough2(names, limit):
	name, length = longest_customer_name(names)
	return length <= limit
	print(all_names_short_enough2(['Alice', 'Bob'], 3))
	print(all_names_short_enough2(['Alice', 'Bob'], 5))
	def add5(arg):
	b = arg + 5
	return b
	add5(7) # Return 12
	def pattern_a(price, tax):
	return price * (1 + 0.01 * tax) # Everything we need is in the arguments - good
	tax = 20 # Global variable - this is worse style
	def pattern_b(price):
	return price * (1 + 0.01 * tax) # Works, but less flexible, hard to debug
	print(pattern_a(100,20))
	print(pattern_b(100))
	def add_two(my_number):
	a = my_number + 2 # Shadows outer "a", now we have two a's and see this one
	print("a is " + str(a) + " inside add_two")
	return a
	a = 5
	print("add_two(2) is " + str(add_two(2)))
	print("a is " + str(a) + " outside add_two")
	my_list = ['a','b','c']
	def concatenate_all(my_list):
	out = ''
	for item in my_list:
	out += item
	return out
	print(concatenate_all(['d','e'])) # ['d','e'] is called my_list in the function
	print(concatenate_all(my_list)) # my_list is still a,b,c
	names = ["Catherine", "Donovan", "alice", "BOB"]
	standardized_names = []
	for name in names:
	name = name.capitalize() # Capitalize first letter, lc others
	standardized_names.append(name)
	standardized_names.sort()
	jobs = ['Pilot', 'teacheR', 'firefighter', 'LIBRARIAN']
	standardized_jobs = []
	for job in jobs:
	job = job.capitalize()
	standardized_jobs.append(job)
	standardized_jobs.sort()
	print(standardized_names)
	print(standardized_jobs)
	names = ["Catherine", "Donovan", "alice", "BOB"]
	jobs = ['Pilot', 'teacheR', 'firefighter', 'LIBRARIAN']
	def standardize_strings(string_list):
	out = []
	for s in string_list:
	s = s.capitalize()
	out.append(s)
	out.sort()
	return out
	standard_names = standardize_strings(names)
	standard_jobs = standardize_strings(jobs)
	print(standard_names)
	print(standard_jobs)
	def get_first_letter(word):
	""" Returns the first letter of a string.
	word (str): The string to get the letter from.
	A simple function just for demo purposes. Probably
	not useful since get_first_letter takes more characters
	to type than string[0].
	"""
	return word[0]
	get_ipython().run_line_magic('pinfo', 'get_first_letter')
	print(get_first_letter("Shibboleth") == "S")
	print(pattern_a(100,20) == 120)
	print(pattern_a(0, 20) == 0)
	print(count_matches("A",[]) == 0)
	print(count_matches("A", ["A","A","A"]) == 3)
	# Lecture10Hashes.py
	my_menu_dict = {
	"Salmon": 25,
	"Steak": 30,
	"Mac and cheese" : 18
	}
	print(my_menu_dict["Salmon"])
	my_menu_dict = {} # empty dictionary
	my_menu_dict["Salmon"] = 25
	my_menu_dict["Steak"] = 30
	my_menu_dict["Mac and cheese"] = 18
	print(my_menu_dict["Salmon"])
	my_dict = {}
	my_dict.get('sushi', 0)
	two_cities = """It was the best of times, it was the worst of times,
	it was the age of wisdom, it was the age of foolishness, it was the epoch of belief,
	it was the epoch of incredulity, it was the season of light, it was the season of darkness,
	it was the spring of hope, it was the winter of despair."""
	worddict = {}
	wordlist = two_cities.split()
	for word in wordlist:
	if word in worddict: # Check for presence of key
	worddict[word] += 1
	else:
	worddict[word] = 1
	print(worddict["age"])
	print(worddict["of"])
	for word, count in worddict.items():
	print(word + ":" + str(count))
	def word_prob(word, worddict):
	numerator = worddict.get(word, 0)
	denominator = 0
	for word, count in worddict.items():
	denominator += count
	return numerator / denominator
	print(word_prob('winter', worddict)) # Should be 1/60 = 0.0167 or so
	print(word_prob('season', worddict)) # Should be 2/60 = 0.0333 or so
	print(word_prob('Pokemon', worddict)) # Should be 0 with no errors
	bigIPs = {"209.85.231.104", "207.46.170.123", "72.30.2.43"}
	bigIPs.add("208.80.152.2")
	len(bigIPs)
	newset = set()
	newset.add("First item")
	print("First item" in newset)
	myset = set(range(123456789)) # {0, 1, 2, ...}
	mylist = list(range(123456789)) # [0, 1, 2, ...]
	12345678 in myset # Fast, uses hash
	12345678 in mylist # Slower, check each item
	two_cities_extended = """It was the best of times,
	it was the worst of times, it was the age of wisdom,
	it was the age of foolishness, it was the epoch of belief,
	it was the epoch of incredulity, it was the season of Light,
	it was the season of Darkness, it was the spring of hope,
	it was the winter of despair, we had everything before us,
	we had nothing before us, we were all going direct to Heaven,
	we were all going direct the other way--in short, the period was
	so far like the present period that some of its noisiest authorities
	insisted on its being received, for good or for evil, in the superlative
	degree of comparison only.
	There were a king with a large jaw and a queen with a plain face,
	on the throne of England; there were a king with a large jaw and a
	queen with a fair face, on the throne of France. In both countries
	it was clearer than crystal to the lords of the State preserves of
	loaves and fishes, that things in general were settled for ever.
	It was the year of Our Lord one thousand seven hundred and seventy-five.
	Spiritual revelations were conceded to England at that favoured period,
	as at this. Mrs. Southcott had recently attained her five-and-twentieth
	blessed birthday, of whom a prophetic private in the Life Guards had heralded
	the sublime appearance by announcing that arrangements were made for the
	swallowing up of London and Westminster. Even the Cock-lane ghost had been
	laid only a round dozen of years, after rapping out its messages, as the
	spirits of this very year last past (supernaturally deficient in originality)
	rapped out theirs. Mere messages in the earthly order of events had lately
	come to the English Crown and People, from a congress of British subjects
	in America: which, strange to relate, have proved more important to the human
	race than any communications yet received through any of the chickens of the
	Cock-lane brood.
	"""
	wordlist = two_cities_extended.split()
	def find_by_list(wordlist):
	for word in wordlist:
	if word in wordlist:
	continue # Move on to next loop
	get_ipython().run_line_magic('time', 'find_by_list(wordlist)')
	worddict = {}
	for word in wordlist:
	if word in worddict:
	worddict[word] += 1
	else:
	worddict[word] = 1
	def find_by_dict(wordlist, dict):
	for word in wordlist:
	if word in dict:
	continue # Move on to next iteration of the for loop
	get_ipython().run_line_magic('time', 'find_by_dict(wordlist,worddict)')
	mydict = {"a":1000}
	dict2 = mydict # gets the address, so any changes are permanent to the original
	dict2["b"] = 500
	print(mydict)
	print(dict2)
	dict3 = dict2.copy()
	dict3["c"] = 40
	print(dict2)
	print(dict3)
	from string import ascii_lowercase
	myset = set()
	for i in range(len(two_cities_extended)):
	myset.add(two_cities_extended[i].lower())
	def checkletters(myset):
	for c in ascii_lowercase:
	# TODO check whether this letter appeared in myset, maybe return a value
	if c not in myset:
	print("Missing: " + c)
	return False
	print("All found")
	return True
	checkletters(myset)
	# Lecture11and12NumpyMatplotlib.py
	import numpy as np
	v = np.array([1, 2 ,3])
	print(v)
	A = np.array([[1, 0, 0],
	[0 ,2, 0],
	[0, 0, 3]]) # 3x3 with 1,2,3 along the diagonal
	print(A)
	print(A.shape) # Tuples: like lists, but use () instead of []
	print(v.shape) # 1d outputs a comma to indicate it's still a tuple
	v1 = v
	print(v1)
	v2 = np.array([4, 5, 6])
	print(v2)
	print("Adding 1D arrays: ", v1 + v2)
	print("Subtracting 1D arrays: ", v1 - v2)
	print("Multiplying 1D arrays: ", v1 * v2)
	print("Dividing 1D arrays: ", v1 / v2)
	print(v1)
	print("Adding by a constant: ", v1 + 2)
	print("Subtracting by a constant: ", v1 - 2)
	print("Multiplying by a constant: ", v1 * 2)
	print("Dividing by a constant: ", v1 / 2)
	my_array = np.array([[1,2,3],
	[4,5,6]])
	print(np.min(my_array, axis=0))
	print(np.mean(my_array, axis=1))
	B = np.array([[3, 2],
	[4, -1]])
	w = np.array([1, -1])
	z = B @ w
	print(z)
	my_array = np.array([8, 6, 7, 5, 3, 0, 9])
	print(my_array[1:3]) # prints index 1 and 2, not 3
	print(my_array)
	print(my_array[1:])
	my_array[:3]
	my_matrix = np.array([[42.3, 71.1, 92],
	[40.7, 70.0, 85],
	[47.6, 122.0, 82]])
	print(my_matrix)
	two_by_two_square = my_matrix[1:, :2]
	print(two_by_two_square)
	no_last_column = my_matrix[:, :2] # no temperature
	print(no_last_column)
	import numpy as np
	a = np.array([0, 1, 2, 3, 4, 5])
	print(a)
	b = a[1:3]
	print(b)
	b[1] = 100 # modify the slice...
	print(a) # ...and see the original change
	print(np.zeros(3)) #create an array of zeros with length 3
	print(np.zeros((2, 3))) # create a 2x3 matrix of zeros
	import matplotlib.pyplot as plt
	x = [1, 2, 3]
	y = [1, 4, 9]
	plt.plot(x, y)
	plt.show()
	import numpy as np
	my_points = np.array([[2, 1],
	[3, 4],
	[5, 6]]) # Each list is a point
	print(my_points)
	plt.plot(my_points[:, 0], my_points[:,1]) # Slice to get x values separate from y values
	plt.show()
	plt.plot(my_points[:, 0], my_points[:, 1], 'ro') # 'r' is for red, 'o' is for circles
	plt.show()
	distances_millions_miles = [35, 67, 93, 142, 484, 889, 1790, 2880]
	plt.plot(np.arange(1, 9), distances_millions_miles, 'o')
	plt.show()
	np.arange(1,9)
	xpoints = np.linspace(0, 10, 100)
	ypoints = xpoints ** 2 + 1
	plt.plot(xpoints, ypoints)
	plt.show()
	plt.plot(my_points[:, 0], my_points[:, 1], 'ro')
	myfit_x = np.linspace(1, 5, 100)
	myfit_y = np.linspace(1.5, 5.5, 100) # Same y/x slope for all segments - so, a line
	plt.plot(myfit_x,myfit_y)
	plt.show()
	import matplotlib.pyplot as plt
	x = [1, 2, 3]
	y1 = [1, 2, 3]
	y2 = [3, 2, 1]
	plt.plot(x, y1, label='Sales')
	plt.plot(x, y2, label='Quality')
	plt.legend()
	plt.title('Trends')
	plt.grid(True)
	customers = ['Oliver', 'Sophia', 'Liam', 'Arielle', 'Noah']
	total_purchases = [56, 73, 24, 48, 88]
	plt.bar(customers, total_purchases)
	plt.xlabel("Customer name", fontsize=14)
	plt.ylabel("Total purchases", fontsize=14)
	plt.title("Total purchases for 5 Amazon customers", fontsize=16)
	plt.tick_params(axis='x', labelsize=14)
	plt.tick_params(axis='y', labelsize=14)
	plt.show()
	# Lecture13BiggerPrograms.py
	"""
	Compute f-measure for each item in a list.

	Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn)
	(these stand for true positive, false positive, etc)
	Returns: a list of floats, the f-measures.
	"""
	"""
	Compute the f-measure, a performance measure that ignores true negatives.

	Arguments: tp (int): the count of true positives
	fp (int): the count of false negatives
	tn (int): the count of true negatives
	fn (int): the count of false negatives
	Returns: a float, the f-measure.
	"""
	def f_measures(stats_list):
	"""
	Compute f-measure for each item in a list.

	Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn)
	(these stand for true positive, false positive, etc)
	Returns: a list of floats, the f-measures.
	"""
	for tp, fp, tn, fn in stats_list:
	f = f_measure(tp, fp, tn, fn)
	def f_measures(stats_list):
	"""
	Compute f-measure for each item in a list.

	Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn)
	(these stand for true positive, false positive, etc)
	Returns: a list of floats, the f-measures.
	"""
	out = []
	for tp, fp, tn, fn in stats_list:
	f = f_measure(tp, fp, tn, fn)
	out.append(f)
	return f
	def f_measure(tp, fp, tn, fn):
	"""
	Compute the f-measure, a performance measure that ignores true negatives.

	Arguments: tp (int): the count of true positives
	fp (int): the count of false negatives
	tn (int): the count of true negatives
	fn (int): the count of false negatives
	Returns: a float, the f-measure.
	"""
	precision = tp/(tp + fp)
	recall = tp/(tp + fn)
	return (2 * precision * recall)/(precision + recall)
	def f_measure(precision, recall):
	"""
	Compute the f-measure, a performance measure that ignores true negatives.

	Arguments: precision (float): proportion of positive classifications that are correct
	recall (float): proportion of positive examples that were found
	Returns: a float, the f-measure.
	"""
	return (2 * precision * recall)/(precision + recall)
	def precision(tp, fp):
	return tp/(tp + fp)
	def recall(tp, fn):
	tp/(tp + fn)

	def f_measures(stats_list):
	"""
	Compute f-measure for each item in a list.

	Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn)
	(these stand for true positive, false positive, etc)
	Returns: a list of floats, the f-measures.
	"""
	out = []
	for tp, fp, tn, fn in stats_list:
	f = f_measure(precision(tp, fp), recall(tp, fn))
	out.append(f)
	return f
	print(precision(4,4)) # Expect 0.5
	print(recall(4,4)) # Expect 0.5
	print(f_measure(1, 1)) # Expect 1
	def recall(tp, fn):
	print(tp/(tp + fn))

	recall(4,4)
	def recall(tp, fn):
	print(tp/(tp + fn))
	return tp/(tp + fn)

	recall(4,4)
	def f_measure(precision, recall):
	"""
	Compute the f-measure, a performance measure that ignores true negatives.

	Arguments: precision (float): proportion of positive classifications that are correct
	recall (float): proportion of positive examples that were found
	Returns: a float, the f-measure.
	"""
	return (2 * precision * recall)/(precision + recall)
	def precision(tp, fp):
	return tp/(tp + fp)
	def recall(tp, fn):
	return tp/(tp + fn)

	def f_measures(stats_list):
	"""
	Compute f-measure for each item in a list.

	Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn)
	(these stand for true positive, false positive, etc)
	Returns: a list of floats, the f-measures.
	"""
	out = []
	for tp, fp, tn, fn in stats_list:
	f = f_measure(precision(tp, fp), recall(tp, fn))
	out.append(f)
	return f
	print(precision(4,4)) # Expect 0.5
	print(recall(4,4)) # Expect 0.5
	print(f_measure(1, 1)) # Expect 1
	print(precision(0, 4)) # Expect 0
	print(precision(0, 0)) # Expect ... oh, I guess we didn't think about this. 0?
	print(precision(4, 0)) # Expect 1
	print(recall(0, 4)) # Expect 0
	print(recall(0, 0)) # Similarly to precision, let's return 0
	print(recall(4, 0)) # Expect 1
	print(f_measure(0, 0)) # Expect 0
	print(f_measure(0.5, 0.5)) # Expect 0.5
	def f_measure(precision, recall):
	"""
	Compute the f-measure, a performance measure that ignores true negatives.

	Arguments: precision (float): proportion of positive classifications that are correct
	recall (float): proportion of positive examples that were found
	Returns: a float, the f-measure.
	"""
	return (2 * precision * recall)/(precision + recall)
	def precision(tp, fp):
	if tp + fp == 0:
	return 0
	return tp/(tp + fp)
	def recall(tp, fn):
	if tp + fn == 0:
	return 0
	return tp/(tp + fn)

	def f_measures(stats_list):
	"""
	Compute f-measure for each item in a list.

	Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn)
	(these stand for true positive, false positive, etc)
	Returns: a list of floats, the f-measures.
	"""
	out = []
	for tp, fp, tn, fn in stats_list:
	f = f_measure(precision(tp, fp), recall(tp, fn))
	out.append(f)
	return f
	print(precision(4,4)) # Expect 0.5
	print(recall(4,4)) # Expect 0.5
	print(f_measure(1, 1)) # Expect 1
	print(precision(0, 4)) # Expect 0
	print(precision(0, 0)) # Expect 0
	print(precision(4, 0)) # Expect 1
	print(recall(0, 4)) # Expect 0
	print(recall(0, 0)) # Similarly to precision, let's return 0
	print(recall(4, 0)) # Expect 1
	print(f_measure(0, 0)) # Expect 0
	print(f_measure(0.5, 0.5)) # Expect 0.5
	def f_measure(precision, recall):
	"""
	Compute the f-measure, a performance measure that ignores true negatives.

	Arguments: precision (float): proportion of positive classifications that are correct
	recall (float): proportion of positive examples that were found
	Returns: a float, the f-measure.
	"""
	if precision + recall == 0:
	return 0
	return (2 * precision * recall)/(precision + recall)
	def precision(tp, fp):
	if tp + fp == 0:
	return 0
	return tp/(tp + fp)
	def recall(tp, fn):
	if tp + fn == 0:
	return 0
	return tp/(tp + fn)

	def f_measures(stats_list):
	"""
	Compute f-measure for each item in a list.

	Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn)
	(these stand for true positive, false positive, etc)
	Returns: a list of floats, the f-measures.
	"""
	out = []
	for tp, fp, tn, fn in stats_list:
	f = f_measure(precision(tp, fp), recall(tp, fn))
	out.append(f)
	return f
	print(precision(4,4)) # Expect 0.5
	print(recall(4,4)) # Expect 0.5
	print(f_measure(1, 1)) # Expect 1
	print(precision(0, 4)) # Expect 0
	print(precision(0, 0)) # Expect 0
	print(precision(4, 0)) # Expect 1
	print(recall(0, 4)) # Expect 0
	print(recall(0, 0)) # Similarly to precision, let's return 0
	print(recall(4, 0)) # Expect 1
	print(f_measure(0, 0)) # Expect 0
	print(f_measure(0.5, 0.5)) # Expect 0.5
	# Lecture14Pandas.py
	import pandas as pd
	import numpy as np
	s1 = pd.Series([-3, -1, 1, 3, 5])
	print(s1)
	print(s1.index)
	s1[:2] # First 2 elements
	print(s1[[2,1,0]]) # Elements out of order
	type(s1)
	s1[s1 > 0]
	s2 = pd.Series(np.random.rand(5), index=['a', 'b', 'c', 'd', 'e'])
	print(s2)
	print(s2.index)
	print(s2['a'])
	data = {'pi': 3.14159, 'e': 2.71828} # dictionary
	print(data)
	s3 = pd.Series(data)
	print(s3)
	my_array = s3.values
	print(my_array)
	import numpy as np
	my_data = np.array([[5, 5, 4],
	[2, 3, 4]])
	hotels = pd.DataFrame(my_data, index = ["Alice rating", "Bob rating"],
	columns = ["Hilton", "Marriott", "Four Seasons"])
	hotels
	from google.colab import files
	uploaded = files.upload() # pick starbucks_drinkMenu_expanded.csv
	get_ipython().system('ls')
	import pandas as pd
	df = pd.read_csv('starbucks_drinkMenu_expanded.csv', index_col = 'Beverage')
	df.head()
	sorted_df = df.sort_values(by = "Calories", ascending=False)
	sorted_df.head()
	hotels
	hotels['Hilton']
	sum = 0
	for i in hotels['Hilton']:
	sum += i
	print('Average Hilton Rating: ' + str(sum/len(hotels['Hilton'])))
	hotels.loc['Bob rating']
	hotels.loc['Bob rating', 'Marriott']
	hotels.iloc[1, 1]
	print(hotels.iloc[0, 1:2])
	print(hotels.loc['Bob rating', ['Marriott', 'Hilton']])
	(df['Calories'] > 300)
	df[df['Calories'] > 300].head()
	df[(df['Calories'] > 300) & (df['Beverage_prep'] == 'Soymilk')].head()
	df['bad_fat'] = df['Trans_Fat_g'] + df['Saturated_Fat_g']
	df.head()
	size_ounces_dict = {'Short': 8, 'Tall': 12, 'Grande': 16, 'Venti': 20}
	ounces_list = []
	for drink in df['Beverage_prep']:
	ounces_list.append(size_ounces_dict.get(drink, -1))
	df['ounces'] = ounces_list
	df.head()
	def size_to_ml(size_name):
	size_ounces_dict = {'Short': 8, 'Tall': 12, 'Grande': 16, 'Venti': 20}
	return size_ounces_dict.get(size_name,0) * 29.5735
	ml = df['Beverage_prep'].map(size_to_ml)
	print(ml)
	# Lecture15Pandas.py
	import pandas as pd
	df = pd.read_csv('starbucks_drinkMenu_expanded.csv', index_col = 'Beverage')
	df.head()
	print(df.loc[:, "Protein_g"].mean())
	print(df.loc[:, "Protein_g"].max())
	print(df.loc[:, "Protein_g"].idxmax()) # "argmax," gives index with biggest value
	df.describe()
	df.corr(numeric_only=True) # New to pandas 2.0.0: chokes on strings without added arg
	df.columns
	df.dtypes
	string = 'string'
	string[:-1]
	df['Vitamin_A'] = df['Vitamin_A'].str[0:-1] # Remove the % at the end
	df['Vitamin_A']
	df['Vitamin_A'] = pd.to_numeric(df['Vitamin_A'])
	df.dtypes
	df['Vitamin_A'] = df['Vitamin_A'].astype('float64')
	df.dtypes
	df.corr(numeric_only=True)
	df.isnull().sum()
	df = df.dropna(axis=0, how="any") # Remove the offending row
	df.isnull().sum()
	calorie_max = 0
	best_name = ""
	for index, row in df.iterrows():
	if row['Calories'] > calorie_max:
	calorie_max = row['Calories']
	best_name = index
	print(best_name)
	protein = df.loc[:, "Protein_g"]
	protein.hist(bins=20); # Create a histogram with 20 equally spaced bins for the data
	subplot = df[["Protein_g", "Vitamin_A"]] # Notice another way to get desired columns
	subplot.boxplot(); # Boxplots give median value, middle 50% of data, and range of non-outliers
	from google.colab import files
	uploaded = files.upload() # pick titanic.csv
	df = pd.read_csv('titanic.csv', index_col = 'PassengerId')
	df.head()
	df.columns
	df.dtypes
	df.describe()
	df.corr(numeric_only=True)
	males = df[df['Sex'] == 'male']
	males.head()
	males.describe()
	females = df[df['Sex'] == 'female']
	females.describe()
	df['sex_numeric'] = df['Sex'] == 'female'
	df.corr(numeric_only=True)
	third_class = df[df['Pclass'] == 3]
	second_class = df[df['Pclass'] == 2]
	first_class = df[df['Pclass'] == 1]
	third_class['Survived'].hist();
	second_class['Survived'].hist();
	first_class['Survived'].hist();
	# Lecture16Strings.py
	my_cost = 12.95821
	print(f'The total cost was {my_cost} dollars')
	print(f'The total cost was {my_cost:.2f} dollars')
	groceries = "milk,eggs,yogurt"
	grocerieslist = groceries.split(',')
	print(grocerieslist)
	','.join(['milk', 'eggs', 'yogurt'])
	' milk,eggs,yogurt '.strip()
	lines = "SERVANT: Sir, there are ten thousand--\nMACBETH: Geese, villain?"
	linelist = lines.splitlines() # A shortcut for split('\n')
	for line in linelist:
	if line.startswith("MACBETH"):
	print(line.split(": ")[1])
	print('Wow\n\twow!')
	print("foo" in "food")
	print("foodfood".replace("foo", "ra"))
	import numpy as np
	import pandas as pd
	my_data = np.array([["Excellent", " Okay ", " Okay"], ["Great ", " Good", " Good"]])
	df = pd.DataFrame(my_data, columns = ["Hilton", "Marriott", "Four Seasons"], index = ["Alice", "Bob"])
	df
	marriott = df['Marriott']
	for s in marriott:
	print(s)
	print('---')
	for s in marriott.str.strip():
	print(s) # Look, no extra whitespace
	marriott.str.match("\sOkay\s")
	import re
	pattern = '02143'
	longstring = 'Somerville, MA 02143'
	result = re.search(pattern, longstring)
	if result: # (if it's not None)
	print(result.group())
	longstring = '0132428190214200'
	pattern2 = '02143'
	result2 = re.search(pattern2, longstring)
	print(result2)
	pattern3 = '\d\d\d\d\d'
	longstring = 'Somerville, MA 02143'
	result3 = re.search(pattern3, longstring)
	if result3:
	print(result3.group())
	longstring = 'My phone number is 5555555'
	pattern4 = 'phone number is \d+'
	result4 = re.search(pattern4, longstring)
	if result4:
	print(result4.group())
	longstring = 'Call me at 555-5555'
	pattern5 = '\d\d\d-?\d\d\d\d'
	result5 = re.search(pattern5, longstring)
	if result5:
	print(result5.group())
	longstring = "Call me at 1-800-555-5555."
	pattern = "(\d-)?(\d\d\d-)?\d\d\d-?\d\d\d\d"
	result = re.search(pattern, longstring)
	if result:
	print(result.group())
	longstring2 = "Call me at 555-5555."
	result = re.search(pattern, longstring2)
	if result:
	print(result.group())
	pattern = "Somerville, (MA\|NJ)"
	longstring = "Somerville, NJ 02143"
	result = re.search(pattern, longstring)
	if result:
	print(result.group())
	longstring = "States with a Somerville: AL, IN, ME, MA, NJ, OH, TN, TX"
	pattern = "[A-Z][A-Z]" # Get capital letters within A-Z range
	result = re.findall(pattern, longstring)
	print(result)
	longstring = "The stock NVDA went down 4.54 points"
	pattern = "stock (\w+) went down (\d+\.\d+) points"
	result = re.search(pattern, longstring)
	if result:
	print(result.group())
	print(result.group(1)) # Subgroup 1, the first () in the pattern
	print(result.group(2))
	import re
	longstring = "We paid $100 for those shoes"
	pattern = '\$\d+'
	result = re.search(pattern, longstring)
	print(result.group())
	# Lecture18Objects.py
	class Car:
	pass
	car1 = Car()
	car2 = Car()
	car3 = Car()
	print(isinstance(car1,Car))
	car1.year = 2010
	car1.make = "Honda"
	car1.model = "Fit"
	car1.color = "blue"
	car2.year = 2013
	car2.make = "Toyota"
	car2.model = "Camry"
	car2.color = "silver"
	print(f"This car is a {car1.year} {car1.color} {car1.make} {car1.model}")
	my_car = (2010, 'Honda', 'Fit', 'blue')
	print(f"This car is a {my_car[0]} {my_car[3]} {my_car[1]} {my_car[2]}")
	class Car:
	def print_facts(self):
	print(f"This car is a {self.year} {self.color} {self.make} {self.model}")
	car1 = Car()
	car2 = Car()
	car1.year = 2010
	car1.make = "Honda"
	car1.model = "Fit"
	car1.color = "blue"
	car2.year = 2013
	car2.make = "Toyota"
	car2.model = "Camry"
	car2.color = "silver"
	car1.print_facts()
	car2.print_facts()
	class Car:
	def __init__(self, year, make, model, color):
	# It's common for the constructor's arguments
	# to have similar or identical names to the attributes they set
	# (but we still have to say one should be set to the other)
	self.year = year
	self.make = make
	self.model = model
	self.color = color

	def print_facts(self):
	print(f"This car is a {self.year} {self.color} {self.make} {self.model}")
	car1 = Car(2010, "Honda", "Fit", "blue")
	car2 = Car(2013, "Toyota", "Camry", "silver")
	car1.print_facts()
	car2.print_facts()
	def newest_car(list_of_cars):
	if not list_of_cars: # ie, empty list
	return None
	best_year = list_of_cars[0].year
	best_car = list_of_cars[0]
	for car in list_of_cars:
	# This warning message could prevent a bug if we try
	# to hand this function the wrong list
	if not isinstance(car, Car):
	print('Warning, list had non-car items!')
	elif car.year > best_year:
	best_year = car.year
	best_car = car
	return best_car
	newest_car([car1, car2]).print_facts()
	class Bill:
	""" Represents a bill at a restaurant.
	_items (list of tuples): list of (item name, cost) tuples
	"""
	def __init__(self, items):
	self._items = items
	# "Getter"
	def items(self):
	return self._items
	# "Setter"
	def set_items(self, items):
	self._items = items

	def total_cost_pretax(self):
	total = 0
	for name, cost in self._items:
	total += cost
	return total
	def total_cost_with_tax(self, tax_rate):
	return round(self.total_cost_pretax() * (1 + tax_rate), 2)
	my_lunch = [("Ham Sandwich", 9), ("Coke", 2)]
	new_bill = Bill(my_lunch)
	cost_with_tax = new_bill.total_cost_with_tax(0.08)
	print(f"Total cost: {cost_with_tax}")
	new_bill.items() # could have said new_bill._items, but we were told not to
	class Bill:
	""" Represents a bill at a restaurant.
	_item_names (list of strings): list of items on bill
	_item_costs (list of ints): list of prices of items on bill
	_items is not here anymore! sorry anybody who wrote code that uses it, we warned you!
	"""
	def __init__(self, items):
	self._item_names = [item[0] for item in items]
	self._item_costs = [item[1] for item in items]
	# "Getter"
	def items(self):
	# list(zip(a, b)) returns a list of tuples combining a and b
	return list(zip(self._item_names, self._item_costs))
	# "Setter"
	def set_items(self, items):
	self._item_names = [item[0] for item in items]
	self._item_costs = [item[1] for item in items]

	def total_cost_pretax(self):
	total = 0
	for name, cost in self._items:
	total += cost
	return total
	# Notice that we can call another method with this one
	def total_cost_with_tax(self, tax_rate):
	return round(self.total_cost_pretax() * (1 + tax_rate), 2)
	my_lunch = [("Ham Sandwich", 9), ("Coke", 2)]
	new_bill = Bill(my_lunch)
	print(new_bill.items()) # this still works, but _items would have broken
	class Circle:
	def __init__(self, radius):
	if radius < 0:
	raise ValueError("Can't have negative circle radius")
	self.radius=radius
	Circle(-1)
	class Circle2:
	def __init__(self,radius=2):
	self.radius = radius
	Circle2().radius
	class Student:
	def __init__(self, age, major, year):
	self.age = age
	self.major = major
	self.year = year

	def get_older(self, amount):
	self.age += amount
	bob = Student(20,"Biology","Sophomore")
	bob.get_older(2)
	print(bob.age)
	car1 = Car(2010, "Honda", "Fit", "blue")
	car2 = car1
	car2.color = "black"
	car1.print_facts() # It's black now
	car2.print_facts()
	import copy
	car2 = copy.copy(car1)
	car2.color = "white"
	car1.print_facts()
	car2.print_facts()
	from google.colab import files
	uploaded = files.upload() # import books.csv
	import pandas as pd
	df = pd.read_csv('books.csv', index_col = 'title')
	df.head()
	class Book:
	def __init__(self, title, author, average_rating):
	self.title = title
	self.author = author
	self.average_rating = average_rating
	# Could add more fields from the dataset if desired

	class Publisher:
	def __init__(self, df, publisher_name):
	self.name = publisher_name
	self.books = []
	for row in df.itertuples():
	if row.publisher == publisher_name:
	self.books.append(Book(row.Index, row.authors, row.average_rating))

	def average_rating(self):
	total = 0
	for book in self.books:
	total += book.average_rating
	return total/len(self.books)
	scholastic = Publisher(df,'Scholastic Inc.')
	scholastic.average_rating()
	# Lecture19MoreOO.py
	class Client: # both Faculty and Students
	def __init__(self, birthyear, uid):
	self.birthyear = birthyear
	self.uid = uid
	def get_uid(self):
	return self.uid

	def get_birthyear(self):
	return self.birthyear
	class Student(Client): # inherit from Client
	def __init__(self, birthyear, uid, gradyear):
	self.birthyear = birthyear
	self.uid = uid
	self.gradyear = gradyear
	def get_gradyear(self):
	return self.gradyear

	class Faculty(Client):
	pass # Nothing else we want to do for Faculty

	alice = Student(2003, 123456789, 2024)
	print(alice.get_birthyear()) # Inherited from Client
	print(alice.get_uid()) # Inherited from Client
	print(alice.get_gradyear()) # Specific to Student
	person1 = Student(2000,123456,2025)
	if not isinstance(person1, Faculty):
	print("Hey, this person doesn't have permission to do this!")
	else:
	print("Welcome, Faculty number " + str(person1.uid) + "!")
	student1 = Student(2000,123456,2025)
	print(isinstance(student1,Student))
	print(isinstance(student1,Client))
	print(isinstance(student1,object)) # Every class inherits from object
	class Student(Client): # inherit from Client
	def __init__(self, birthyear, uid, gradyear):
	super().__init__(birthyear, uid)
	self.gradyear = gradyear
	def get_gradyear():
	return self.gradyear
	bob = Student(2002,987654321,2022)
	print(bob.get_uid()) # inherited from Client
	class Trip:
	def __init__(self,cost,start_date,end_date):
	self.cost = cost
	self.start_date = start_date
	self.end_date = end_date
	self.reimbursed = False
	def cost(self):
	return self.cost

	def reimburse(self):
	self.reimbursed = True

	def dates(self):
	return self.startDate, self.endDate
	class EquipmentOrder:
	def __init__(self,cost,domestic_seller):
	self.cost = cost
	self.reimbursed = False
	self.domestic_seller = domestic_seller
	def cost(self):
	return self.cost

	def reimburse(self):
	self.reimbursed = True

	def domestic_seller(self):
	return self.domestic_seller
	class Expense:
	def __init__(self,cost):
	self.cost = cost
	self.reimbursed = False

	def cost(self):
	return self.cost

	def reimburse(self):
	self.reimbursed = True
	class Trip(Expense):
	def __init__(self,cost,start_date,end_date):
	super().__init__(cost)
	self.start_date = start_date
	self.end_date = end_date

	# inherit cost, reimburse
	def dates(self):
	return self.start_date, self.end_date
	class EquipmentOrder(Expense):
	def __init__(self,cost,domestic_seller):
	super().__init__(cost)
	self.domestic_seller = domestic_seller
	# inherit cost, reimburse
	def domestic_seller(self):
	return self.domestic_seller
	class Employee:
	def __init__(self, name, salary, title, years_of_service):
	self.name = name
	self.salary = salary
	self.title = title
	self.years_of_service = years_of_service

	def give_raise(self, raise_amount):
	self.salary += raise_amount

	def change_title(self, new_title):
	self.title = new_title

	def update_years_of_service(self, increase):
	self.years_of_service += increase
	class Contractor:
	def __init__(self, name, salary, contract_duration):
	self.name = name
	self.salary = salary
	self.contract_duration = contract_duration

	def give_raise(self, raise_amount):
	self.salary += raise_amount

	alice = Employee("Alice", 90000, "Manager", 7)
	alice.give_raise(10000)
	print(alice.salary)
	bob = Contractor("Bob", 80000, 2)
	bob.give_raise(10000)
	print(bob.salary)
	class Worker:
	def __init__(self, name, salary):
	self.name = name
	self.salary = salary

	def give_raise(self, raise_amount):
	self.salary += raise_amount

	class Employee(Worker):
	def __init__(self, name, salary, title, years_of_service):
	super().__init__(name, salary)
	self.title = title
	self.years_of_service = years_of_service

	def change_title(self, new_title):
	self.title = new_title

	def update_years_of_service(self, increase):
	self.years_of_service += increase
	class Contractor(Worker):
	def __init__(self, name, salary, contract_duration):
	super().__init__(name, salary)
	self.contract_duration = contract_duration

	alice = Employee("Alice", 90000, "Manager", 7)
	alice.give_raise(10000)
	print(alice.salary)
	bob = Contractor("Bob", 80000, 2)
	bob.give_raise(10000)
	print(bob.salary)
	class Gradyear:
	def __init__(self, year):
	self.year = year
	year = Gradyear(2024)
	print(year)
	class Gradyear:
	def __init__(self, year):
	self.year = year
	def __str__(self): # Our own implementation
	return str(self.year)
	gradyear = Gradyear(2024)
	print(gradyear)
	gy1 = Gradyear(2024)
	gy2 = Gradyear(2024)
	print(gy1 == gy2)
	myset = set()
	myset.add(gy1)
	myset.add(gy2)
	len(myset)
	class Gradyear:
	def __init__(self, year):
	self.year = year
	def __str__(self): # Our own implementation
	return str(self.year)

	def __eq__(self, other):
	return self.year == other.year
	def __hash__(self):
	return self.year # Just store by number itself
	gy1 = Gradyear(2024)
	gy2 = Gradyear(2024)
	print(gy1 == gy2)
	myset = set()
	myset.add(gy1)
	myset.add(gy2)
	len(myset)
	# Lecture20Recursion.py
	def bad_recursion():
	print("Bad!")
	bad_recursion()
	bad_recursion()
	def factorial(n):
	# Omitting checks to make sure we're a natural number, etc
	if n == 1:
	return 1
	return n * factorial(n-1)
	print (factorial(4))
	def factorial(n):
	# Omitting checks to make sure we're a natural number, etc
	print(f'Evaluating {n}!')
	if n == 1:
	print('Returning 1')
	return 1
	result = n * factorial(n-1)
	print(f'Returning {result}')
	return result
	print (factorial(4))
	def sum_m_to_n(m, n):
	if n == m:
	return m
	result = n + sum_m_to_n(m, n-1)
	return result
	sum_m_to_n(3, 7) # 3 + 4 + 5 + 6 + 7 = 25
	def sum_m_to_n(m, n):
	print(f'Evaluating sum from {m} to {n}')
	if n == m:
	print(f'Returning {m}')
	return m
	result = n + sum_m_to_n(m, n-1)
	print(f'Returning {result}')
	return result
	sum_m_to_n(3, 7) # 3 + 4 + 5 + 6 + 7 = 25
	def mypow(a, p):
	if p == 0:
	return 1
	result = a * mypow(a, p-1)
	return result
	mypow(2,8)
	def mypow(a, p):
	print(f'Evaluating {a}^{p}')
	if p == 0:
	print('Returning 1')
	return 1
	result = a * mypow(a, p-1)
	print(f'Returning {result}')
	return result
	mypow(2,8)
	def fib(n):
	if (n == 0):
	return 0
	if (n == 1):
	return 1
	return fib(n-1) + fib(n-2)
	for i in range(10):
	print(fib(i))
	def r_perm(r, n):
	if n == r+1:
	return n
	return n * r_perm(r,n-1)
	r_perm(5,7)
	def iter_factorial(n):
	running_fact = 1
	for i in range(1,n+1):
	running_fact *= i
	return running_fact

	print(iter_factorial(4))
	import numpy as np
	def iter_fib(n):
	if n == 0 or n == 1:
	return n
	fibs = np.zeros(n+1)
	fibs[0] = 0
	fibs[1] = 1
	for i in range(2,n+1):
	fibs[i] = fibs[i-1] + fibs[i-2]
	return int(fibs[n])
	for i in range(10):
	print(iter_fib(i))
	def power_set(setstring):
	if len(setstring) == 0:
	return [""]
	subset_list = []
	# Recursive call gets all the subsets that don't involve the first character
	smaller_power_set = power_set(setstring[1:])
	# The starting character is either in the subset...
	for substring in smaller_power_set:
	subset_list.append(setstring[0] + substring)
	# ...or not.
	for substring in smaller_power_set:
	subset_list.append(substring)
	return subset_list
	power_set("abcd")
	def recursive_sum(lst):
	if not lst: # empty list
	return 0
	return lst[0] + recursive_sum(lst[1:])
	recursive_sum([1,2,3])
	def recursive_filter(min_val, lst):
	if not lst:
	return []
	if lst[0] >= min_val:
	return [lst[0]] + recursive_filter(min_val, lst[1:])
	else:
	return recursive_filter(min_val, lst[1:])
	recursive_filter(3, [1, 2, 3, 4, 5])
	def recursive_index(item, lst, index): # index tracks where we are in the list
	if not lst:
	return None # not found
	if lst[0] == item:
	return index
	return recursive_index(item,lst[1:],index+1)
	recursive_index(5, [0, 1, 2, 5], 0)
	def recursive_skiplist(lst):
	if len(lst) == 0:
	return []
	if len(lst) == 1:
	return lst
	return [lst[0]] + recursive_skiplist(lst[2:])
	recursive_skiplist([5,3,7,2,9])
	# Lecture21DataStructures.py
	class ll_node:
	def __init__(self, num):
	self.number = num
	self.next = None
	def append(self, num):
	if self.next == None: # End of the list - add the node
	self.next = ll_node(num)
	else:
	self.next.append(num) # Recursively append to rest of list

	def contains(self, othernum):
	if self.number == othernum: # We found it
	return True
	elif self.next == None: # We reached the end, didn't find it
	return False
	# Not here, there's more list - so, keep looking (recursively)
	return self.next.contains(othernum)
	def __str__(self):
	if self.next == None: # Last number
	return str(self.number)
	# Print this and print the rest (more recursion)
	return str(self.number) + ' ' + str(self.next)
	mylist = ll_node(6)
	mylist.append(1)
	mylist.append(7)
	print(mylist)
	print('Contains 7: ' + str(mylist.contains(7)))
	print('Contains 5: ' + str(mylist.contains(5)))
	import numpy as np
	class dynamic_array: # Showing how Python lists work
	def __init__(self, initial_size):
	self.memory = np.zeros(initial_size)
	self.occupied = 0
	self.size = initial_size
	def __str__(self):
	return str(self.memory)

	def append(self, val):
	if self.occupied == self.size:
	print('Resizing...')
	new_memory = np.zeros(self.size*2)
	# A "hiccup" in running time as everything's copied
	for i in range(len(self.memory)):
	new_memory[i] = self.memory[i]
	self.memory = new_memory
	self.size = self.size*2
	print('Adding ' + str(val))
	self.memory[self.occupied] = val
	self.occupied += 1
	my_array = dynamic_array(2)
	print(my_array)
	my_array.append(1)
	my_array.append(1)
	print(my_array)
	my_array.append(1)
	print(my_array)
	my_array.append(1)
	print(my_array)
	class FolderTree:
	# binary left and right are its fields
	def __init__(self, val):
	self.left = None
	self.right = None
	self.val = val

	def addLeft(self, node):
	self.left = node

	def addRight(self, node):
	self.right = node

	def find(self, v):
	if self.val == v:
	return True
	# "if self.left" is checking that self.left exists -
	# else error when we run self.left.find()
	if self.left and self.left.find(v):
	return True
	if self.right and self.right.find(v):
	return True
	return False
	leftleftchild = FolderTree("wow.exe")
	leftrightchild = FolderTree("xls.exe")
	rightleftchild = FolderTree("lec12.pdf")
	rightrightchild = FolderTree("lec14.pdf")
	leftparent = FolderTree("apps")
	rightparent = FolderTree("lecs")
	leftparent.addLeft(leftleftchild)
	leftparent.addRight(leftrightchild)
	rightparent.addLeft(rightleftchild)
	rightparent.addRight(rightrightchild)
	root = FolderTree("root")
	root.addLeft(leftparent)
	root.addRight(rightparent)
	print(root.find("wow.exe"))
	print(root.find("lec13.exe"))
	def count_nodes(tree):
	if tree == None:
	return 0
	return 1 + count_nodes(tree.left) + count_nodes(tree.right)
	count_nodes(root)
	def calc_depth(tree):
	if tree is None:
	return 0
	if tree.left is None and tree.right is None:
	return 0 # Leaf has depth 0 in its subtree
	return 1 + max(calc_depth(tree.left), calc_depth(tree.right))
	calc_depth(root)
	class BinarySearchTree:
	# binary left and right are its fields
	def __init__(self, val):
	self.left = None
	self.right = None
	self.val = val

	def addLeft(self, node):
	self.left = node

	def addRight(self, node):
	self.right = node

	def find(self, v):
	if self.val == v:
	return True
	if v < self.val:
	if self.left:
	print("Going Left")
	return self.left.find(v)
	else:
	return False
	else:
	if self.right:
	print("Going Right")
	return self.right.find(v)
	else:
	return False
	root = BinarySearchTree("m")
	leftparent = BinarySearchTree("f")
	rightparent = BinarySearchTree("q")
	leftleftchild = BinarySearchTree("a")
	leftrightchild = BinarySearchTree("h")
	rightleftchild = BinarySearchTree("o")
	rightrightchild = BinarySearchTree("u")
	leftparent.addLeft(leftleftchild)
	leftparent.addRight(leftrightchild)
	rightparent.addLeft(rightleftchild)
	rightparent.addRight(rightrightchild)
	root.addLeft(leftparent)
	root.addRight(rightparent)
	print(root.find("h"))
	print(root.find("d"))
	class infect_tree:
	# name is a string, infects is a list of infect_tree's infected
	def __init__(self, name, infects):
	self.name = name
	self.infects = infects
	jake = infect_tree('jake', [])
	eric = infect_tree('eric', [])
	fifi = infect_tree('fifi', [])
	ged = infect_tree('ged', [])
	hao = infect_tree('hao', [])
	idris = infect_tree('idris', [jake])
	bob = infect_tree('bob', [eric])
	che = infect_tree('che', [])
	daphne = infect_tree('daphne', [fifi, ged, hao, idris])
	alice = infect_tree('alice', [bob, che, daphne])
	def find_most_infections(my_tree):
	best_infects = len(my_tree.infects)
	best_name = my_tree.name
	for infect in my_tree.infects:
	name, infects = find_most_infections(infect) # Recursion...
	if infects > best_infects:
	best_infects = infects
	best_name = name
	return best_name, best_infects
	find_most_infections(alice)
	def find_all_descendants(my_tree):
	my_list = [my_tree.name]
	for infect in my_tree.infects:
	my_list += find_all_descendants(infect) # More recursion
	return my_list
	find_all_descendants(daphne)
	# Lecture22ScikitLearn.py
	from sklearn.datasets import load_digits
	import matplotlib.pyplot as plt
	digits = load_digits()
	print(digits.data.shape) # Examples x 64 pixels
	import matplotlib.pyplot as plt
	plt.gray()
	plt.matshow(digits.images[0]) # Notice images[0] is 2D
	from warnings import simplefilter
	simplefilter(action='ignore', category=FutureWarning)
	from sklearn.neighbors import KNeighborsClassifier
	nbrs = KNeighborsClassifier(n_neighbors=3).fit(digits.data, digits.target)
	nbrs.score(digits.data, digits.target) # Find accuracy on the training dataset
	from sklearn.model_selection import train_test_split
	data_train, data_test, label_train, label_test = train_test_split(digits.data, digits.target, test_size=0.2)
	nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train)
	nbrs.score(data_test,label_test)
	print(nbrs.predict(data_test[0:3]))
	def reshape_and_show(num, data_test):
	image = data_test[num].reshape(8,8)
	plt.matshow(image)
	reshape_and_show(0,data_test)
	reshape_and_show(1,data_test)
	reshape_and_show(2,data_test)
	from sklearn.datasets import fetch_lfw_people
	faces = fetch_lfw_people(min_faces_per_person = 100)
	plt.imshow(faces.images[5], cmap="gray")
	data_train, data_test, label_train, label_test = train_test_split(faces.data, faces.target, test_size=0.2)
	nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train)

	nbrs.score(data_test,label_test)
	import random
	random.seed(110) # Set seed - comment this out to get different rolls
	print(random.randint(1,8)) # Normally produces random integer 1-8
	print(random.randint(1,8))
	data_train, data_test, label_train, label_test = train_test_split(faces.data,
	faces.target, test_size=0.2,
	random_state=110) # Set the seed
	nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train)

	nbrs.score(data_test,label_test)
	from sklearn.model_selection import cross_val_score
	cross_val_score(nbrs, data_train, label_train)
	import numpy as np
	for i in range(1,10):
	nbrs = KNeighborsClassifier(n_neighbors=i)
	print(np.mean(cross_val_score(nbrs, data_train, label_train)))
	# Lecture23DecisionTrees.py
	import math
	yes_branch_entropy = 0
	no_branch_entropy = -0.2 * math.log(0.2,2) - 0.8 * math.log(0.8, 2)
	pr_yes = 5/2005
	pr_no = 2000/2005
	print(pr_yes * yes_branch_entropy + pr_no * no_branch_entropy)
	from sklearn.datasets import load_iris
	from sklearn.model_selection import train_test_split
	import numpy as np
	iris = load_iris()
	iris.feature_names
	iris.target_names
	iris.data[0]
	features_train, features_test, labels_train, labels_test = \
	train_test_split(iris.data, iris.target, test_size=0.1, random_state=110)
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import cross_val_score
	dtree = DecisionTreeClassifier(criterion="entropy", random_state=110)
	dtree.fit(features_train, labels_train)
	dtree.score(features_test, labels_test) # Gives accuracy
	import matplotlib.pyplot as plt
	from sklearn import tree
	plt.figure(figsize=(14,10))
	tree.plot_tree(dtree, feature_names = iris.feature_names, class_names = iris.target_names)
	# Lecture24RandomForestsOnly.py
	from sklearn.datasets import load_iris
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.model_selection import train_test_split
	import numpy as np
	iris = load_iris()
	iris["feature_names"]
	features_train, features_test, labels_train, labels_test = \
	train_test_split(iris['data'], iris['target'],
	test_size=0.1,random_state=110)
	irisforest = RandomForestClassifier(n_estimators=200,criterion="entropy",random_state=110)
	irisforest.fit(features_train, labels_train)
	irisforest.score(features_test, labels_test)
	irisforest.feature_importances_
	# Lecture25Regression.py
	import numpy as np
	x = np.linspace(1984, 2016, 33)
	y = [48.0, 47.3, 47.2, 47.4, 47.2, 46.7,
	49.7, 49.6, 46.4, 47.3, 47.7, 47.8, 47.3, 47.4, 50.4, 49.8,
	47.5, 49.1, 49.4, 47.1, 47.6, 48.4, 50.1, 48.3, 48.6, 47.8,
	50.4, 49.7, 51.4, 48.8, 47.7, 48.5, 50.3]
	import matplotlib.pyplot as plt
	plt.plot(x,y,'o')
	import sklearn.linear_model as lm
	from sklearn.linear_model import LinearRegression
	linear_model = LinearRegression()
	x = x.reshape(-1,1)
	linear_model.fit(x,y)
	y_hat = linear_model.predict(x)
	plt.plot(x,y,'o')
	plt.plot(x,y_hat,'r')
	print(f'The temperature is rising {linear_model.coef_[0]:.4f} degrees F per year')
	print(f'{linear_model.intercept_:.2f}')
	linear_model.score(x,y)
	methane = np.array([12.81, 25.15, 38.06, 49.47, 60.24, 71.32,
	80.08, 94.14, 96.49, 100.32, 107.54, 111.50, 113.97, 120.26, 132.39, 134.82,
	133.30, 132.60, 135.91, 140.65, 135.76, 136.14, 138.11, 145.90, 152.41, 157.13,
	162.33, 167.15, 172.17, 177.86, 190.62, 200.65, 207.73])
	mass_co = [84, 82.7, 84.9, 81.7, 81.9, 79.2, 79.9, 85.9, 84.3, 81.9,
	82.9, 82.8,83.7, 85, 83.6, 85, 77.1, 80.4, 77.2, 70.6,
	72.0, 68.1, 61.9, 65.7, 63.8, 65.6, 63.9]
	y_from_90 = y[6:] # From the last example, these are the temperatures
	methane_from_90 = methane[6:]
	x = np.transpose(np.array([mass_co, methane_from_90]))
	x
	temp_model = LinearRegression()
	temp_model.fit(x,y_from_90)
	print(temp_model.coef_)
	print(temp_model.intercept_)
	from sklearn.tree import DecisionTreeRegressor
	import numpy as np
	import matplotlib.pyplot as plt
	model = DecisionTreeRegressor() # no pruning of any kind, so expect overfitting
	x = np.linspace(1984, 2016, 33)
	x = x.reshape(-1,1)
	y = [48.0, 47.3, 47.2, 47.4, 47.2, 46.7,
	49.7, 49.6, 46.4, 47.3, 47.7, 47.8, 47.3, 47.4, 50.4, 49.8,
	47.5, 49.1, 49.4, 47.1, 47.6, 48.4, 50.1, 48.3, 48.6, 47.8,
	50.4, 49.7, 51.4, 48.8, 47.7, 48.5, 50.3]
	xtrain = x[:30]
	ytrain = y[:30]
	model.fit(xtrain,ytrain)
	yhat = model.predict(x)
	plt.plot(x,y,'o')
	plt.plot(x[:30],yhat[:30])
	plt.plot(x[29:],yhat[29:],'r') # Plot line to test predictions in red
	model = DecisionTreeRegressor(max_depth = 3) # maybe overdoing it on the pruning
	x = np.linspace(1984, 2016, 33)
	prev_value_features = [0] + y.copy()[:-1] # shift y values so we see the previous one; discard last
	combined_features = np.array([x, prev_value_features]).transpose()
	print(combined_features)
	xtrain = combined_features[:30,:]
	model.fit(xtrain,ytrain)
	yhat = model.predict(combined_features)
	plt.plot(x,y,'o')
	plt.plot(x[:30],yhat[:30])
	plt.plot(x[29:],yhat[29:],'r')
	from sklearn.ensemble import RandomForestRegressor
	model = RandomForestRegressor()
	model.fit(xtrain,ytrain) # xtrain has the matrix we made in the previous code box
	yhat = model.predict(combined_features)
	plt.plot(x,y,'o')
	plt.plot(x[:30],yhat[:30])
	plt.plot(x[29:],yhat[29:],'r')
	from sklearn.neighbors import KNeighborsRegressor
	model = KNeighborsRegressor(n_neighbors=3)
	model.fit(xtrain,ytrain) # xtrain has the matrix we made in the previous code box
	yhat = model.predict(combined_features)
	plt.plot(x,y,'o')
	plt.plot(x[:30],yhat[:30])
	plt.plot(x[29:],yhat[29:],'r')
	# Lecture26ModernNLPandML.py
	import pandas as pd
	SST2_LOC = 'https://github.com/clairett/pytorch-sentiment-classification/raw/master/data/SST2/train.tsv'
	df = pd.read_csv(SST2_LOC, delimiter='\t', header=None)
	df
	import nltk
	from nltk.tokenize import word_tokenize
	nltk.download('punkt') # Name means 'period' in German; from Kiss and Strunk 2006
	word_tokenize("I won't sell my cat for even $1,000,000,000.")
	def wordset(raw_text):
	tokenized = word_tokenize(raw_text.lower())
	return set(tokenized)
	def all_words_set(df_column):
	set_of_all = set()
	dict_of_all = {}
	for row in df_column:
	textset = wordset(row)
	set_of_all = set_of_all.union(textset)
	dict_of_all[row] = textset
	return set_of_all, dict_of_all
	def one_hot_columns(df_column):
	all_words, all_tokenizations = all_words_set(df_column)
	word_dict = {}
	for word in all_words:
	word_present_list = []
	for line_num in range(len(df_column)):
	if word in all_tokenizations[df_column[line_num]]:
	word_present_list.append(1)
	else:
	word_present_list.append(0)
	word_dict[word] = word_present_list
	# We can create a dataframe from a dictionary of column header
	# to list of column values
	return pd.DataFrame.from_dict(word_dict)
	one_hot_cols = one_hot_columns(df.iloc[:,0])
	one_hot_cols
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	labels = df[1]
	features = one_hot_cols
	X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=42)
	clf = RandomForestClassifier(n_estimators=200, random_state=42)
	clf.fit(X_train, y_train)
	clf.score(X_test, y_test)
	one_hot_cols.sum()
	import gensim.downloader as api
	wv = api.load('word2vec-google-news-300')
	wv['king']
	print(wv.most_similar('king')) # Prints words and cosines of angles with 'king'
	import numpy as np
	def find_cosine(vec1, vec2):
	# Scale vectors to both have unit length
	unit_vec1 = vec1/np.linalg.norm(vec1)
	unit_vec2 = vec2/np.linalg.norm(vec2)
	# The dot product of unit vectors gives the cosine of their angle
	return np.dot(unit_vec1,unit_vec2)
	print(find_cosine(wv['king'], wv['faucet']))
	wv.similarity('king', 'faucet')
	def find_avg_vector(txt, embedding):
	words = word_tokenize(txt)
	vec_sum = None
	count = 0
	for word in words:
	if word in embedding:
	count += 1
	if vec_sum is not None:
	vec_sum += embedding[word]
	else:
	# The embeddings are read-only unless you copy them
	vec_sum = embedding[word].copy()
	if vec_sum is None:
	return pd.Series(np.zeros((300,))) # Treat no word found in embedding as zero vector
	return pd.Series(vec_sum/count)
	find_avg_vector('Long live the king and queen!', wv)
	df_embeddings = df[0].apply(lambda txt: find_avg_vector(txt, wv))
	df_embeddings.rename(columns=lambda x: 'feature'+str(x), inplace=True)
	df_augmented = pd.concat([df, df_embeddings], axis=1)
	df_augmented
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	labels = df_augmented[1]
	features = df_augmented.iloc[:,2:]
	X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=42)
	clf = RandomForestClassifier(n_estimators=200, random_state=42)
	clf.fit(X_train, y_train)
	clf.score(X_test, y_test)