Spaces:

klgold
/

ds110probs

Running

App Files Files Community

Kevin Gold commited on Nov 13, 2024

Commit

daebe13

1 Parent(s): fd80aaf

Lecture Python examples added

Browse files

Files changed (2) hide show

app.py +4 -1
master.py +2263 -0

app.py CHANGED Viewed

@@ -26,7 +26,10 @@ HF_TOKEN=os.environ.get("HF_TOKEN")
 PROFILES_URL = "https://huggingface.co/datasets/klgold/tutor_profiles"
 PROMPT_PREFIX="Give me a practice problem for an introductory course in python and data science that uses the following concepts: "
 prefix_length = len(PROMPT_PREFIX)
-PROMPT_SUFFIX = 'The first section of your response should say "PROBLEM" followed by the problem.  The second section of your response should say "SOLUTION" before the Python code that solves the problem.  The third section of your response should say "HINT" before a hint that would help with the one issue the student was most likely to get stuck on.'
 suffix_length = len(PROMPT_SUFFIX)
 #Currently get error on trying to create Repository - need email?
 repo = Repository(

 PROFILES_URL = "https://huggingface.co/datasets/klgold/tutor_profiles"
 PROMPT_PREFIX="Give me a practice problem for an introductory course in python and data science that uses the following concepts: "
 prefix_length = len(PROMPT_PREFIX)
+PROMPT_SUFFIX = 'The first section of your response should say "PROBLEM" followed by the problem.  The second section of your response should say "SOLUTION" before the Python code that solves the problem.  The third section of your response should say "HINT" before a hint that would help with the one issue the student was most likely to get stuck on.  Additionally, in your solution you must
+not use any Python keywords, syntax, or concepts that are not included in the Python examples that follow, which are all the examples provided in lecture:'
+with open('master.py', 'r') as pythonfile:
+    PROMPT_SUFFIX += pythonfile.read()
 suffix_length = len(PROMPT_SUFFIX)
 #Currently get error on trying to create Repository - need email?
 repo = Repository(

master.py ADDED Viewed

	@@ -0,0 +1,2263 @@

+# Lecture2HelloWorldAndExpressions.py
+print('Hello, world!')
+print('Hello 1')
+print('Hello 2')
+print('Hello 3')
+print('Hello, world!')
+print(Hello, world!)
+print(Hello, world!) # Intentionally creates an error!
+print(1)         # Technically an expression
+print(1+2)       # Two operands and an operator make an expression
+print(10*(10+1)) # The expression (10+1) acting as an operand
+print(3 + 8 / 2)  # What do you predict?
+print(4 * 2 + 3 + 5 * 2)  # And this one?
+print('Hello', 'world', '!')
+print(max(2,5,7))
+print(max(2,7) + max(3,9)) # Using function calls as operands
+print(max(2,7) + max(3,9)) # Calc 7, calc 9, then add
+1
+2
+3
+max(2,7)
+None
+print(2) + 2
+print('Hello, world!')
+max(2 ** 8, 3 ** 6, 5 ** 3)
+1.0000000000000001 - 1
+print(type(-100)) # int
+print(type(10.1)) # float
+print(type('A'))  # str
+print(type(True)) # bool
+print(type('10')) # str
+print(type(10))   # int
+print(type(10.0)) # float
+print(type(True)) # bool
+0.1 + 0.1 + 0.1
+'Hello ' + 1111
+'Hello ' + 'world' + '!'
+'Hello ' + str(1111)
+20 * 9/5 + 32
+print('Temp: 68.0 F')
+print('Temp: ' + 20 * 9/5 + 32 + ' F')
+print('Temp: ' + str(20 * 9/5 + 32) + ' F')
+# Lecture3VariablesAndConditions.py
+two_to_the_eighth = 2 ** 8
+print(two_to_the_eighth)
+two_to_the_eighth * 2
+pay_per_hour = 18
+pay_per_hour = 20  # Pay raise!
+print(pay_per_hour)
+counter = 0
+counter = counter + 1 # It's an instruction, not an equality!
+print(counter)
+counter = counter + 1
+print(counter)
+pay_per_hour = 20
+hours = 40
+total_pay = pay_per_hour * hours
+print(total_pay)
+Pay_Per_Hour = 15   # please avoid this capitalization style!
+print(pay_per_hour) # remembers the lowercase value
+silent_assignment = 0
+20 = pay_per_hour
+print(undefined_var + 7)
+color = input('What is your favorite color? ')
+print('Yeah, ' + color + ' is pretty great!')
+to_square_str = input('What should I square? ')
+print(int(to_square_str) ** 2)
+city = input('What city are we in? ')
+print(city == 'Boston')
+answer = input('What is 2+2? ')
+print(answer == 4) # not going to work
+answer == '4'   # but this works
+int(answer) == 4 # or this
+float(answer) == 4 # or even this
+print(1 < 1)
+print(1 > 1)
+print(1 != 1)
+print(1 <= 1)
+print(1 >= 1)
+print('aardvark' < 'zebra')
+print('capitalized' == 'Capitalized')
+2 + 5 > 7 - 4  # 5 > 7 would be false, but (2+5) > (7-4) is True
+total = 0
+value_str = input('Enter a value: ')
+value_int = int(value_str)
+if value_int < 0:
+    print('Sorry, that was a negative value.')
+else:
+    total = total + value_int
+print(total)
+if condition:
+    statement_if_true1
+    statement_if_true2
+    statement_if_true3
+    ...
+else:
+    statement_if_false1
+    statement_if_false2
+    ...
+statement_regardless1
+statement_regardless2
+...
+value = int(input('Enter an integer:'))
+if value < 0:
+    print('Negative')
+else:
+    print('Positive')
+print('Done')
+password = input('Enter the password: ')
+if password == '1234':
+    print('Correct!')
+    print('Your account has $1000000 in it.')
+else:
+    print('Incorrect.')
+print('Have a nice day.')
+num1_str = input('Enter an integer: ')
+num2_str = input('Enter a different integer: ')
+num1_int = int(num1_str)
+num2_int = int(num2_str)
+if num1_str == num2_str:
+    print('The numbers were supposed to be different...')
+    print('But you entered ' + num1_str + ' twice!')
+else:
+    print(num2_str + ' divided by ' + num1_str + ' is...')
+    print(num2_int / num1_int)  # Divide by zero would be error, btw
+print('Done...')
+language = input('What is your favorite language? ')
+if language == 'Python':
+    print('Mine too!')
+print('But there sure are a lot of languages out there....')
+value = int(input('Enter an integer between 0 and 100: '))
+if value < 0:
+    print('No negative numbers!')
+elif value > 100:
+    print('That value is too large!')
+elif value == 42:
+    print('That was the number I was thinking of!')
+else:
+    print('Guess again.')
+value = int(input('Enter an integer between 0 and 100: '))
+if value < 0:
+    print('No negative numbers!')
+elif value > 100:
+    print('That value is too large!')
+elif value >= 50:
+    print('Big!')
+else:
+    print('Small!')
+value = int(input('Enter an integer between 0 and 100: '))
+if value < 0:
+    print('No negative numbers!')
+else:
+    if value > 100:
+        print('That value is too large!')
+    else:
+        if value >= 50:
+            print('Big!')
+        else:
+            print('Small!')
+age = int(input('Enter your age: '))
+if age < 18:
+    if age < 5:
+        print('Just a toddler, then.')
+    elif age < 12:
+        print('Not quite a teenager, then.')
+    else:
+        print('Teenage years ... a difficult time!')
+else:
+    print('An adult, then.')
+    if age >= 55:
+        print('And a senior citizen, too!')
+num1 = int(input('First number: '))
+num2 = int(input('Second number: '))
+num3 = int(input('Third number: '))
+my_max = max(num1, num2, num3)
+my_min = min(num1, num2, num3)
+my_mean = (num1+num2+num3)/3  # Note importance of parens!
+print('Min: ' + str(my_min))
+print('Max: ' + str(my_max))
+print('Mean: ' + str(my_mean))
+if num1 == num2:
+    print(str(num1) + ' was repeated')
+elif num2 == num3:
+    print(str(num2) + ' was repeated')
+elif num1 == num3:
+    print(str(num3) + ' was repeated')
+else:
+    print('The numbers were unique')
+# Lecture4WhileAndLists.py
+string = input('Enter a number: ')
+while string != 'stop':
+    print(string + ' squared is ' + str(int(string) ** 2))
+    string = input('Enter a number: ')
+print('Done.')
+counter = 0
+while counter < 21:
+    print(counter)
+    counter = counter + 1
+print(counter)
+counter = 1
+print('We will now iterate three times...')
+while counter < 4:
+    print('Iteration ' + str(counter))
+    counter = counter + 1
+total = 0
+count = 0
+value_str = input('Enter a number, or "done" if done: ')
+while value_str != 'done':
+    count = count + 1
+    value_int = int(value_str)
+    total = total + value_int
+    value_str = input('Enter a number, or "done" if done: ')
+if count > 0:
+    print('The average is ' + str(total/count))
+total = 0
+count = 0
+value_str = input('Enter a number, or "done" if done: ')
+while value_str != 'done':
+    count += 1
+    value_int = int(value_str)
+    total += value_int
+    value_str = input('Enter a number, or "done" if done: ')
+if count > 0:
+    print('The average is ' + str(total/count))
+while(True):
+    input('Enter any input to get a compliment: ')
+    print('That is so clever of you!')
+my_list = ['duck', 'duck', 'goose']  # A list with 3 items
+print(my_list[0])
+print(my_list[1])
+print(my_list[2])
+my_list = ['duck', 'duck', 'goose']
+my_list[2] = 'bear'
+print(my_list)
+my_list = [1, 2, 3]
+my_list.append(4)
+print(my_list)  # my_list has changed...
+print(my_list.append(5))
+print(my_list)
+shopping_list = []
+item = input('Add an item to the shopping list (or "done"): ')
+while item.lower() != 'done':
+    shopping_list.append(item)
+    item = input('Add an item to the shopping list (or "done"): ')
+print('Okay, so that was: ')
+print(shopping_list)
+[1, 2, 3] + [4, 5, 6]
+print(len('Hello'))
+print(len([1, 2, 3]))
+my_items = ['eggs', 'flour', 'milk']
+print(len(my_items), 'items')
+print(my_items[2])
+print(my_items[len(my_items)-1])
+planet_diameter_km = [4879, 12104, 12756, 6792, 142984, 120536, 51118, 49528, 2377]
+planet_diameter_km.sort()
+planet_diameter_km
+my_list1 = [3, 2, 1]
+my_list2 = my_list1
+my_list1.sort()
+print(my_list1)
+print(my_list2)
+my_list1 = [3, 2, 1]
+my_list2 = my_list1.copy()
+my_list1.sort()
+print(my_list1)
+print(my_list2)
+honors = ['Albert', 'Berenice', 'Chen', 'Dominique']
+mentioned_honors = []
+nonhonors = []
+student = input('Enter a name (or "done"): ')
+while (student != 'done'):
+    if student in honors:
+        print('Honors!')
+        mentioned_honors.append(student)
+    else:
+        print('Not honors...')
+        nonhonors.append(student)
+    student = input('Enter a name (or "done"): ')
+print('Honors mentioned: ' + str(mentioned_honors))
+print('Nonhonors mentioned: ' + str(nonhonors))
+# Lecture5MorePower.py
+percent = input('Enter a percentage between 0 and 100:')
+if float(percent) >= 0 and float(percent) <= 100:
+    if float(percent) >= 10:
+        print('A decent return on investment....')
+    else:
+        print('Not a great return on investment....')
+else:
+    print('That is not in the requested range!')
+vip = False
+spent = 10
+if vip or spent >= 10000:
+    print('Send this person a loyalty reward!')
+else:
+    print('This person deserves nothing!')
+vip = False
+if not vip:
+    print('Have you considered signing up to join the VIP program?')
+else:
+    print('Welcome back, VIP customer!')
+vip = False
+spent = 0
+if not vip or spent < 10000:  # "not" applied to vip before "or"
+    print('Please spend more')
+else:
+    print('Hello, valued patron!')
+vip = False
+spent = 0
+if not (vip or spent < 10000): # within parens evaluates to True
+    print('Please spend more')
+else:
+    print('Hello, valued patron!')
+my_list = [1,2,3]
+my_list2 = [7,8,9]
+if not 4 in my_list and not 4 in my_list2:
+    print('No 4 found')
+my_list = [1,2,3]
+my_list2 = [7,8,9]
+if 4 not in my_list and not in my_list2:
+    print('This will actually cause an error - not how "in" works')
+import math
+math.sqrt(2)
+import math as m
+m.sqrt(2)
+from math import sqrt as my_sqrt
+my_sqrt(2)
+get_ipython().system('python3 -m ensurepip --upgrade')
+get_ipython().system('pip install seaborn')
+import seaborn as sns
+df = sns.load_dataset("penguins") # Load a dataset about penguins
+sns.jointplot(data=df, x="flipper_length_mm", y="bill_length_mm", hue="species")
+import statistics
+statistics.median([1, 2, 3, 4])
+import statistics
+statistics.median([1, 2, 3, 4])
+total = 0
+count = 0
+value_str = input('Enter a number, or "done" if done: ')
+while value_str != 'done':
+    count = count + 1
+    value_int = int(value_str)
+    total = total + value_int
+    value_str = input('Enter a number, or "done" if done: ')
+if count > 0:
+    print('The average is ' + str(total/count))
+total = 0
+count = 0
+value_str = input('Enter a non-negative integer, or "done" if done: ')
+while value_str != 'done':
+    if not value_str.isdigit():
+        print('Non-negative integers only!')
+    else:
+        count = count + 1
+        value_int = int(value_str)
+        total = total + value_int
+    value_str = input('Enter a non-negative integer, or "done" if done: ')
+if count > 0:
+    print('The average is ' + str(total/count))
+total = 0
+count = 0
+value_str = input('Enter a number, or "done" if done: ')
+while value_str != 'done':
+    count = count + 1
+    value_int = int(value_str)
+    total = total + value_int
+    print(value_str)
+if count > 0:
+    print('The average is ' + str(total/count))
+3 = my_list
+total = 0
+count = 0
+value_str = input('Enter a number, or "done" if done: ')
+count = count + 1
+value_int = int(value_str)
+total = total + value_int
+if count > 0:
+    print('The average is ' + str(total/count))
+# Lecture6and7Iteration.py
+people = ['Alice', 'Bob', 'Che']
+index = 0
+while index < len(people):
+    person = people[index]
+    print('Hooray for ' + person + '!')
+    index += 1
+people = ['Alice', 'Bob', 'Che']
+for person in people:
+    print('Hooray for ' + person + '!')
+running_total = 0
+numbers = [1,2,3,4,10]
+for n in numbers:
+    running_total = running_total + n  # Could be abbreviated running_total += n
+    print('Sum so far: ' + str(running_total))
+print('Sum: ' + str(running_total))
+my_grades = [4, 3, 2, 3, 4]
+letter_grades = []
+for g in my_grades:
+    if g == 4:
+        letter_grades.append('A')
+    elif g == 3:
+        letter_grades.append('B')
+    elif g == 2:
+        letter_grades.append('C')
+print(letter_grades)
+temps_f = [36, 39, 45, 56, 66, 76, 81, 80, 72, 61, 51, 41] # Jan through Dec
+temps_c = []
+for t in temps_f:
+    degrees_c = (t - 32)*5/9
+    temps_c.append(round(degrees_c, 2)) # Round to 2 decimal places
+temps_c
+my_car = ("Honda Fit", 2010, 30, 10000)
+print(my_car)
+car_type, year, mpg, price = my_car
+print(mpg)
+print(my_car[0] + ' prints successfully')  # OK
+my_car[0] = 'bad value' # Not OK, trying to change the tuple
+my_movies = [("No", 4), ("Rogue One", 4.5), ("Casablanca", 5)]
+for moviename, stars in my_movies:  # Notice the two variable names
+    print ('I would rate ' + moviename + ' ' + str(stars) + ' stars')
+my_movies = [("No", 4), ("Rogue One", 4.5), ("Casablanca", 5)]
+best_rating = 0 # Initialize with a value that is definitely beat
+best_movie = "none"
+for movie, rating in my_movies:
+    if rating > best_rating:
+        best_rating = rating
+        best_movie = movie
+print("Best movie: " + best_movie + "...rating = " + str(best_rating))
+movies = ['Fall Guy', 'Free Guy', 'Cable Guy']
+ratings = [5, 4, 3]
+for movie, rating in zip(movies, ratings):
+    print("I'd rate " + movie + " a " + str(rating))
+sw_movies = [('The Phantom Menace', 52),
+('Attack of the Clones', 65),
+('Revenge of the Sith', 80),
+('Rogue One', 84),
+('Solo', 70),
+('Star Wars', 92),
+('The Empire Strikes Back',94),
+('Return of the Jedi', 82),
+('The Force Awakens', 93),
+('The Last Jedi', 90),
+('The Rise of Skywalker', 51)]
+my_list = []
+for movie, score in sw_movies:
+  if score >= 80:
+    my_list.append(movie)
+print(my_list)
+for i in range(5):
+  print ("Iteration " + str(i))
+for i in range(1,6):
+    print(i)
+my_itinerary = ['Boston', 'Atlanta', 'LA', 'Seattle']
+for idx in range(len(my_itinerary)-1):  # Avoid indexing out of bounds
+    print(my_itinerary[idx] + '-' + my_itinerary[idx+1])
+names = ['Alice', 'Bob', 'Charlie', 'Dora']
+for number, name in enumerate(names):
+    print(name + ' ' + str(number))
+for movie, rating in sw_movies:
+    print('Looking at ' + movie)
+    if movie == 'Rogue One':
+        print('The rating of Rogue One is ' + str(rating))
+        break  # We don't need to look at any other entries
+print('Done')
+my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2],
+                         [100.2, 99.9, 100.0, 103.1]]
+my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2],
+                         [100.2, 99.9, 100.0, 103.1]]
+my_two_stock_histories[1]
+my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2],
+                         [100.2, 99.9, 100.0, 103.1]]
+my_two_stock_histories[1][2]
+my_stock_histories = my_two_stock_histories.copy()
+my_stock_histories.append([5.0, 9.0, 6.0, 7.0])
+print(my_stock_histories)
+print('Stock 0 closing prices: ')
+for price in my_stock_histories[0]:
+    print(price)
+print('Starting prices for all stocks:')
+for stock_list in my_stock_histories:
+    print(stock_list[0])
+letters = ['a', 'b', 'c','d','e','f','g','h','i','j']
+print('All possible coordinates in Battleship:')
+for l in letters:
+    for n in range(1,11):
+        print(l + str(n))
+bills = [[1, 2, 3], [4,5,6], [7,8,9]]
+my_totals = [] # empty list
+for l in bills:
+  print('new list')
+  listsum = 0
+  for l2 in l: # iterating over the list we got from the outer foreach
+    print('adding ' + str(l2))
+    listsum += l2
+  my_totals.append(listsum)
+print('Bill sums:' + str(my_totals))
+print('Possible matchups:')
+players = ['Alice', 'Bobby', 'Caspar', 'Dmitri', 'Eve']
+for white_player in players:
+  for black_player in players:
+    print("White: " + white_player + "; Black player: " + black_player)
+print('Possible matchups:')
+players = ['Alice', 'Bobby', 'Caspar', 'Dmitri', 'Eve']
+for white_player in players:
+  for black_player in players:
+    if not white_player == black_player:
+        print("White: " + white_player + "; Black player: " + black_player)
+my_multiples_of_3 = [v * 3 for v in range(5)]
+my_multiples_of_3
+unrounded = [1.9, 5.3, 9.9]
+rounded  = [round(i,0) for i in unrounded]
+rounded
+unrounded = [1.9, 5.3, 9.9]
+rounded = []
+for item in unrounded:
+    rounded.append(round(item,0))
+print(rounded)
+temps_f = [36, 39, 45, 56, 66, 76, 81, 80, 72, 61, 51, 41] # Jan through Dec
+temps_c = [round((t-32)*5/9,2) for t in temps_f]
+temps_c
+times = [(2,30), (4,10), (1, 30), (0,40), (0, 20)]
+minutes = [t[0]*60 + t[1] for t in times]
+minutes
+# Lecture8and9Functions.py
+def add_an_s(string):
+    new_string = string + 's'
+    return new_string
+add_an_s('example') + '!'
+records = read_customer_data('input.csv')
+sales = 0
+purchase_counts = []
+s_names = []
+for record in records:
+    name, purchase_list, sale_info = parse_record(record)
+    s_names.append(standardize_name(name))
+    sales = update_total_sales(sales, sale_info)
+    update_purchase_counts(purchase_counts, purchase_list)
+write_to_file(s_names, purchase_counts, sales, 'output.csv')
+def add_two(my_number):
+  # Adds two to the argument.
+  return my_number + 2
+add_two(2)
+def count_matches(to_match, my_list):
+  # Counts how many times to_match appears in my_list
+  count = 0
+  for m in my_list:
+    if to_match == m:
+      count += 1
+  return count
+print(count_matches(5, [5, 6, 7, 5]))
+print(count_matches("foo", ["foo","bar","baz"]))
+def percent_gain(start, finish):
+    return (finish-start)/start * 100
+print(percent_gain(36585.06, 33147.25))
+print(percent_gain(4796.56, 3839.50))
+print(percent_gain(15832.80, 10466.48))
+def get_rating(movie_tuple):
+    # More readable way to access a movie rating
+    return movie_tuple[1]
+get_rating(('Portrait of a Lady on Fire', 5))
+def with_tax(price, tax):
+    return round(price * (1 + tax * .01), 2)
+with_tax(1,8.6)
+from datetime import date
+def greet_user():
+  print("Hello, user!")
+  print("Today's date is " + str(date.today()))
+greet_user()
+def greet_user():
+  print("Hello, user!")
+  print("Today's date is " + str(date.today()))
+  return
+print(greet_user())
+def longest_customer_name(list_of_names):
+    # Find the longest customer name, and how long it is
+    # (maybe so we can display the names nicely later)
+    longest_len = 0
+    longest_name = ""
+    for n in list_of_names:
+        if len(n) > longest_len:
+            longest_len = len(n)
+            longest_name = n
+    return longest_name, longest_len
+name, length = longest_customer_name(['Alice', 'Bob', 'Cassia'])
+print(name)
+print(length)
+from statistics import mean
+def min_mean_max(L):
+    return min(L), mean(L), max(L)
+min_mean_max([1,2,3,4,5])
+def count_items(lst):
+    # Count items but warn if the list is empty
+    if (len(lst) == 0):
+        print('Warning: empty list passed to count_items!')
+        return 0
+    print("We don't get here with an empty list")
+    return len(lst)
+count_items([])
+def is_prime(n):
+    for i in range(2, n): # Look for a divisor
+        if n % i == 0:    # i divides n evenly, no remainder
+            return False
+    return True           # didn't find a divisor
+print(is_prime(11))
+print(is_prime(4))
+def longest_customer_name(list_of_names):
+    # Find the longest customer name, and how long it is
+    # (maybe so we can display the names nicely later)
+    longest_len = 0
+    longest_name = ""
+    for n in list_of_names:
+        if len(n) > longest_len:
+            longest_len = len(n)
+            longest_name = n
+    return longest_name, longest_len
+def count_matches(to_match, my_list):
+  # Counts how many times to_match appears in my_list
+  count = 0
+  for m in my_list:
+    if to_match == m:
+      count += 1
+  return count
+def count_longest_name(list_of_names):
+    # Count how many times the longest name appears in the list
+    # Makes use of functions defined above
+    word, length = longest_customer_name(list_of_names)
+    return count_matches(word,list_of_names)
+count_longest_name(['Alice','Bob','Catherine','Catherine'])
+def all_names_short_enough1(names, limit):
+    for name in names:
+        if len(name) > limit:
+            return False
+    return True
+print(all_names_short_enough1(['Alice', 'Bob'], 3))
+print(all_names_short_enough1(['Alice', 'Bob'], 5))
+def all_names_short_enough2(names, limit):
+    name, length = longest_customer_name(names)
+    return length <= limit
+print(all_names_short_enough2(['Alice', 'Bob'], 3))
+print(all_names_short_enough2(['Alice', 'Bob'], 5))
+def add5(arg):
+    b = arg + 5
+    return b
+add5(7) # Return 12
+def pattern_a(price, tax):
+  return price * (1 + 0.01 * tax)  # Everything we need is in the arguments - good
+tax = 20 # Global variable - this is worse style
+def pattern_b(price):
+  return price * (1 + 0.01 * tax) # Works, but less flexible, hard to debug
+print(pattern_a(100,20))
+print(pattern_b(100))
+def add_two(my_number):
+  a = my_number + 2 # Shadows outer "a", now we have two a's and see this one
+  print("a is " + str(a) + " inside add_two")
+  return a
+a = 5
+print("add_two(2) is " + str(add_two(2)))
+print("a is " + str(a) + " outside add_two")
+my_list = ['a','b','c']
+def concatenate_all(my_list):
+    out = ''
+    for item in my_list:
+        out += item
+    return out
+print(concatenate_all(['d','e'])) # ['d','e'] is called my_list in the function
+print(concatenate_all(my_list))  # my_list is still a,b,c
+names = ["Catherine", "Donovan", "alice", "BOB"]
+standardized_names = []
+for name in names:
+    name = name.capitalize() # Capitalize first letter, lc others
+    standardized_names.append(name)
+    standardized_names.sort()
+jobs = ['Pilot', 'teacheR', 'firefighter', 'LIBRARIAN']
+standardized_jobs = []
+for job in jobs:
+    job = job.capitalize()
+    standardized_jobs.append(job)
+    standardized_jobs.sort()
+print(standardized_names)
+print(standardized_jobs)
+names = ["Catherine", "Donovan", "alice", "BOB"]
+jobs = ['Pilot', 'teacheR', 'firefighter', 'LIBRARIAN']
+def standardize_strings(string_list):
+    out = []
+    for s in string_list:
+        s = s.capitalize()
+        out.append(s)
+    out.sort()
+    return out
+standard_names = standardize_strings(names)
+standard_jobs = standardize_strings(jobs)
+print(standard_names)
+print(standard_jobs)
+def get_first_letter(word):
+  """ Returns the first letter of a string.
+  word (str):  The string to get the letter from.
+  A simple function just for demo purposes.  Probably
+  not useful since get_first_letter takes more characters
+  to type than string[0].
+  """
+  return word[0]
+get_ipython().run_line_magic('pinfo', 'get_first_letter')
+print(get_first_letter("Shibboleth") == "S")
+print(pattern_a(100,20) == 120)
+print(pattern_a(0, 20) == 0)
+print(count_matches("A",[]) == 0)
+print(count_matches("A", ["A","A","A"]) == 3)
+# Lecture10Hashes.py
+my_menu_dict = {
+    "Salmon": 25,
+    "Steak": 30,
+    "Mac and cheese" : 18
+}
+print(my_menu_dict["Salmon"])
+my_menu_dict = {} # empty dictionary
+my_menu_dict["Salmon"] = 25
+my_menu_dict["Steak"] = 30
+my_menu_dict["Mac and cheese"] = 18
+print(my_menu_dict["Salmon"])
+my_dict = {}
+my_dict.get('sushi', 0)
+two_cities = """It was the best of times, it was the worst of times,
+ it was the age of wisdom, it was the age of foolishness, it was the epoch of belief,
+ it was the epoch of incredulity, it was the season of light, it was the season of darkness,
+ it was the spring of hope, it was the winter of despair."""
+worddict = {}
+wordlist = two_cities.split()
+for word in wordlist:
+  if word in worddict:  # Check for presence of key
+    worddict[word] += 1
+  else:
+    worddict[word] = 1
+print(worddict["age"])
+print(worddict["of"])
+for word, count in worddict.items():
+  print(word + ":" + str(count))
+def word_prob(word, worddict):
+    numerator = worddict.get(word, 0)
+    denominator = 0
+    for word, count in worddict.items():
+        denominator += count
+    return numerator / denominator
+print(word_prob('winter', worddict))  # Should be 1/60 = 0.0167 or so
+print(word_prob('season', worddict))  # Should be 2/60 = 0.0333 or so
+print(word_prob('Pokemon', worddict))  # Should be 0 with no errors
+bigIPs = {"209.85.231.104", "207.46.170.123", "72.30.2.43"}
+bigIPs.add("208.80.152.2")
+len(bigIPs)
+newset = set()
+newset.add("First item")
+print("First item" in newset)
+myset = set(range(123456789))   # {0, 1, 2, ...}
+mylist = list(range(123456789)) # [0, 1, 2, ...]
+12345678 in myset  # Fast, uses hash
+12345678 in mylist # Slower, check each item
+two_cities_extended = """It was the best of times,
+it was the worst of times, it was the age of wisdom,
+it was the age of foolishness, it was the epoch of belief,
+it was the epoch of incredulity, it was the season of Light,
+it was the season of Darkness, it was the spring of hope,
+it was the winter of despair, we had everything before us,
+we had nothing before us, we were all going direct to Heaven,
+we were all going direct the other way--in short, the period was
+so far like the present period that some of its noisiest authorities
+insisted on its being received, for good or for evil, in the superlative
+degree of comparison only.
+There were a king with a large jaw and a queen with a plain face,
+on the throne of England; there were a king with a large jaw and a
+queen with a fair face, on the throne of France. In both countries
+it was clearer than crystal to the lords of the State preserves of
+loaves and fishes, that things in general were settled for ever.
+It was the year of Our Lord one thousand seven hundred and seventy-five.
+Spiritual revelations were conceded to England at that favoured period,
+as at this. Mrs. Southcott had recently attained her five-and-twentieth
+blessed birthday, of whom a prophetic private in the Life Guards had heralded
+the sublime appearance by announcing that arrangements were made for the
+swallowing up of London and Westminster. Even the Cock-lane ghost had been
+laid only a round dozen of years, after rapping out its messages, as the
+spirits of this very year last past (supernaturally deficient in originality)
+rapped out theirs. Mere messages in the earthly order of events had lately
+come to the English Crown and People, from a congress of British subjects
+in America: which, strange to relate, have proved more important to the human
+race than any communications yet received through any of the chickens of the
+Cock-lane brood.
+"""
+wordlist = two_cities_extended.split()
+def find_by_list(wordlist):
+  for word in wordlist:
+    if word in wordlist:
+        continue # Move on to next loop
+get_ipython().run_line_magic('time', 'find_by_list(wordlist)')
+worddict = {}
+for word in wordlist:
+  if word in worddict:
+    worddict[word] += 1
+  else:
+    worddict[word] = 1
+def find_by_dict(wordlist, dict):
+  for word in wordlist:
+    if word in dict:
+      continue # Move on to next iteration of the for loop
+get_ipython().run_line_magic('time', 'find_by_dict(wordlist,worddict)')
+mydict = {"a":1000}
+dict2 = mydict # gets the address, so any changes are permanent to the original
+dict2["b"] = 500
+print(mydict)
+print(dict2)
+dict3 = dict2.copy()
+dict3["c"] = 40
+print(dict2)
+print(dict3)
+from string import ascii_lowercase
+myset = set()
+for i in range(len(two_cities_extended)):
+  myset.add(two_cities_extended[i].lower())
+def checkletters(myset):
+  for c in ascii_lowercase:
+    # TODO check whether this letter appeared in myset, maybe return a value
+    if c not in myset:
+      print("Missing: " + c)
+      return False
+  print("All found")
+  return True
+checkletters(myset)
+# Lecture11and12NumpyMatplotlib.py
+import numpy as np
+v = np.array([1, 2 ,3])
+print(v)
+A = np.array([[1, 0, 0],
+              [0 ,2, 0],
+              [0, 0, 3]])  # 3x3 with 1,2,3 along the diagonal
+print(A)
+print(A.shape)  # Tuples: like lists, but use () instead of []
+print(v.shape)  # 1d outputs a comma to indicate it's still a tuple
+v1 = v
+print(v1)
+v2 = np.array([4, 5, 6])
+print(v2)
+print("Adding 1D arrays: ",  v1 + v2)
+print("Subtracting 1D arrays: ",  v1 - v2)
+print("Multiplying 1D arrays: ", v1 * v2)
+print("Dividing 1D arrays: ", v1 / v2)
+print(v1)
+print("Adding by a constant: ", v1 + 2)
+print("Subtracting by a constant: ", v1 - 2)
+print("Multiplying by a constant: ", v1 * 2)
+print("Dividing by a constant: ", v1 / 2)
+my_array = np.array([[1,2,3],
+                     [4,5,6]])
+print(np.min(my_array, axis=0))
+print(np.mean(my_array, axis=1))
+B = np.array([[3, 2],
+              [4, -1]])
+w = np.array([1, -1])
+z = B @ w
+print(z)
+my_array = np.array([8, 6, 7, 5, 3, 0, 9])
+print(my_array[1:3]) # prints index 1 and 2, not 3
+print(my_array)
+print(my_array[1:])
+my_array[:3]
+my_matrix = np.array([[42.3, 71.1, 92],
+                      [40.7, 70.0, 85],
+                      [47.6, 122.0, 82]])
+print(my_matrix)
+two_by_two_square = my_matrix[1:, :2]
+print(two_by_two_square)
+no_last_column = my_matrix[:, :2] # no temperature
+print(no_last_column)
+import numpy as np
+a = np.array([0, 1, 2, 3, 4, 5])
+print(a)
+b = a[1:3]
+print(b)
+b[1] = 100 # modify the slice...
+print(a) # ...and see the original change
+print(np.zeros(3)) #create an array of zeros with length 3
+print(np.zeros((2, 3))) # create a 2x3 matrix of zeros
+import matplotlib.pyplot as plt
+x = [1, 2, 3]
+y = [1, 4, 9]
+plt.plot(x, y)
+plt.show()
+import numpy as np
+my_points = np.array([[2, 1],
+                      [3, 4],
+                      [5, 6]]) # Each list is a point
+print(my_points)
+plt.plot(my_points[:, 0], my_points[:,1])    # Slice to get x values separate from y values
+plt.show()
+plt.plot(my_points[:, 0], my_points[:, 1], 'ro') # 'r' is for red, 'o' is for circles
+plt.show()
+distances_millions_miles = [35, 67, 93, 142, 484, 889, 1790, 2880]
+plt.plot(np.arange(1, 9), distances_millions_miles, 'o')
+plt.show()
+np.arange(1,9)
+xpoints = np.linspace(0, 10, 100)
+ypoints = xpoints ** 2 + 1
+plt.plot(xpoints, ypoints)
+plt.show()
+plt.plot(my_points[:, 0], my_points[:, 1], 'ro')
+myfit_x = np.linspace(1, 5, 100)
+myfit_y = np.linspace(1.5, 5.5, 100) # Same y/x slope for all segments - so, a line
+plt.plot(myfit_x,myfit_y)
+plt.show()
+import matplotlib.pyplot as plt
+x = [1, 2, 3]
+y1 = [1, 2, 3]
+y2 = [3, 2, 1]
+plt.plot(x, y1, label='Sales')
+plt.plot(x, y2, label='Quality')
+plt.legend()
+plt.title('Trends')
+plt.grid(True)
+customers = ['Oliver', 'Sophia', 'Liam', 'Arielle', 'Noah']
+total_purchases = [56, 73, 24, 48, 88]
+plt.bar(customers, total_purchases)
+plt.xlabel("Customer name", fontsize=14)
+plt.ylabel("Total purchases", fontsize=14)
+plt.title("Total purchases for 5 Amazon customers", fontsize=16)
+plt.tick_params(axis='x', labelsize=14)
+plt.tick_params(axis='y', labelsize=14)
+plt.show()
+# Lecture13BiggerPrograms.py
+    """
+    Compute f-measure for each item in a list.
+    Argument: stats_list (list):  a list of tuples of four ints, (tp, fp, tn, fn)
+               (these stand for true positive, false positive, etc)
+    Returns:  a list of floats, the f-measures.
+    """
+    """
+    Compute the f-measure, a performance measure that ignores true negatives.
+    Arguments:  tp (int):  the count of true positives
+                fp (int):  the count of false negatives
+                tn (int):  the count of true negatives
+                fn (int):  the count of false negatives
+    Returns: a float, the f-measure.
+    """
+def f_measures(stats_list):
+    """
+    Compute f-measure for each item in a list.
+    Argument: stats_list (list):  a list of tuples of four ints, (tp, fp, tn, fn)
+               (these stand for true positive, false positive, etc)
+    Returns:  a list of floats, the f-measures.
+    """
+    for tp, fp, tn, fn in stats_list:
+        f = f_measure(tp, fp, tn, fn)
+def f_measures(stats_list):
+    """
+    Compute f-measure for each item in a list.
+    Argument: stats_list (list):  a list of tuples of four ints, (tp, fp, tn, fn)
+               (these stand for true positive, false positive, etc)
+    Returns:  a list of floats, the f-measures.
+    """
+    out = []
+    for tp, fp, tn, fn in stats_list:
+        f = f_measure(tp, fp, tn, fn)
+        out.append(f)
+    return f
+def f_measure(tp, fp, tn, fn):
+    """
+    Compute the f-measure, a performance measure that ignores true negatives.
+    Arguments:  tp (int):  the count of true positives
+                fp (int):  the count of false negatives
+                tn (int):  the count of true negatives
+                fn (int):  the count of false negatives
+    Returns: a float, the f-measure.
+    """
+    precision = tp/(tp + fp)
+    recall = tp/(tp + fn)
+    return (2 * precision * recall)/(precision + recall)
+def f_measure(precision, recall):
+    """
+    Compute the f-measure, a performance measure that ignores true negatives.
+    Arguments:  precision (float):  proportion of positive classifications that are correct
+                recall (float):  proportion of positive examples that were found
+    Returns: a float, the f-measure.
+    """
+    return (2 * precision * recall)/(precision + recall)
+def precision(tp, fp):
+    return tp/(tp + fp)
+def recall(tp, fn):
+    tp/(tp + fn)
+def f_measures(stats_list):
+    """
+    Compute f-measure for each item in a list.
+    Argument: stats_list (list):  a list of tuples of four ints, (tp, fp, tn, fn)
+               (these stand for true positive, false positive, etc)
+    Returns:  a list of floats, the f-measures.
+    """
+    out = []
+    for tp, fp, tn, fn in stats_list:
+        f = f_measure(precision(tp, fp), recall(tp, fn))
+        out.append(f)
+    return f
+print(precision(4,4)) # Expect 0.5
+print(recall(4,4)) # Expect 0.5
+print(f_measure(1, 1)) # Expect 1
+def recall(tp, fn):
+    print(tp/(tp + fn))
+recall(4,4)
+def recall(tp, fn):
+    print(tp/(tp + fn))
+    return tp/(tp + fn)
+recall(4,4)
+def f_measure(precision, recall):
+    """
+    Compute the f-measure, a performance measure that ignores true negatives.
+    Arguments:  precision (float):  proportion of positive classifications that are correct
+                recall (float):  proportion of positive examples that were found
+    Returns: a float, the f-measure.
+    """
+    return (2 * precision * recall)/(precision + recall)
+def precision(tp, fp):
+    return tp/(tp + fp)
+def recall(tp, fn):
+    return tp/(tp + fn)
+def f_measures(stats_list):
+    """
+    Compute f-measure for each item in a list.
+    Argument: stats_list (list):  a list of tuples of four ints, (tp, fp, tn, fn)
+               (these stand for true positive, false positive, etc)
+    Returns:  a list of floats, the f-measures.
+    """
+    out = []
+    for tp, fp, tn, fn in stats_list:
+        f = f_measure(precision(tp, fp), recall(tp, fn))
+        out.append(f)
+    return f
+print(precision(4,4)) # Expect 0.5
+print(recall(4,4)) # Expect 0.5
+print(f_measure(1, 1)) # Expect 1
+print(precision(0, 4)) # Expect 0
+print(precision(0, 0)) # Expect ... oh, I guess we didn't think about this.  0?
+print(precision(4, 0)) # Expect 1
+print(recall(0, 4)) # Expect 0
+print(recall(0, 0)) # Similarly to precision, let's return 0
+print(recall(4, 0)) # Expect 1
+print(f_measure(0, 0)) # Expect 0
+print(f_measure(0.5, 0.5)) # Expect 0.5
+def f_measure(precision, recall):
+    """
+    Compute the f-measure, a performance measure that ignores true negatives.
+    Arguments:  precision (float):  proportion of positive classifications that are correct
+                recall (float):  proportion of positive examples that were found
+    Returns: a float, the f-measure.
+    """
+    return (2 * precision * recall)/(precision + recall)
+def precision(tp, fp):
+    if tp + fp == 0:
+        return 0
+    return tp/(tp + fp)
+def recall(tp, fn):
+    if tp + fn == 0:
+        return 0
+    return tp/(tp + fn)
+def f_measures(stats_list):
+    """
+    Compute f-measure for each item in a list.
+    Argument: stats_list (list):  a list of tuples of four ints, (tp, fp, tn, fn)
+               (these stand for true positive, false positive, etc)
+    Returns:  a list of floats, the f-measures.
+    """
+    out = []
+    for tp, fp, tn, fn in stats_list:
+        f = f_measure(precision(tp, fp), recall(tp, fn))
+        out.append(f)
+    return f
+print(precision(4,4)) # Expect 0.5
+print(recall(4,4)) # Expect 0.5
+print(f_measure(1, 1)) # Expect 1
+print(precision(0, 4)) # Expect 0
+print(precision(0, 0)) # Expect 0
+print(precision(4, 0)) # Expect 1
+print(recall(0, 4)) # Expect 0
+print(recall(0, 0)) # Similarly to precision, let's return 0
+print(recall(4, 0)) # Expect 1
+print(f_measure(0, 0)) # Expect 0
+print(f_measure(0.5, 0.5)) # Expect 0.5
+def f_measure(precision, recall):
+    """
+    Compute the f-measure, a performance measure that ignores true negatives.
+    Arguments:  precision (float):  proportion of positive classifications that are correct
+                recall (float):  proportion of positive examples that were found
+    Returns: a float, the f-measure.
+    """
+    if precision + recall == 0:
+        return 0
+    return (2 * precision * recall)/(precision + recall)
+def precision(tp, fp):
+    if tp + fp == 0:
+        return 0
+    return tp/(tp + fp)
+def recall(tp, fn):
+    if tp + fn == 0:
+        return 0
+    return tp/(tp + fn)
+def f_measures(stats_list):
+    """
+    Compute f-measure for each item in a list.
+    Argument: stats_list (list):  a list of tuples of four ints, (tp, fp, tn, fn)
+               (these stand for true positive, false positive, etc)
+    Returns:  a list of floats, the f-measures.
+    """
+    out = []
+    for tp, fp, tn, fn in stats_list:
+        f = f_measure(precision(tp, fp), recall(tp, fn))
+        out.append(f)
+    return f
+print(precision(4,4)) # Expect 0.5
+print(recall(4,4)) # Expect 0.5
+print(f_measure(1, 1)) # Expect 1
+print(precision(0, 4)) # Expect 0
+print(precision(0, 0)) # Expect 0
+print(precision(4, 0)) # Expect 1
+print(recall(0, 4)) # Expect 0
+print(recall(0, 0)) # Similarly to precision, let's return 0
+print(recall(4, 0)) # Expect 1
+print(f_measure(0, 0)) # Expect 0
+print(f_measure(0.5, 0.5)) # Expect 0.5
+# Lecture14Pandas.py
+import pandas as pd
+import numpy as np
+s1 = pd.Series([-3, -1, 1, 3, 5])
+print(s1)
+print(s1.index)
+s1[:2] # First 2 elements
+print(s1[[2,1,0]])  # Elements out of order
+type(s1)
+s1[s1 > 0]
+s2 = pd.Series(np.random.rand(5), index=['a', 'b', 'c', 'd', 'e'])
+print(s2)
+print(s2.index)
+print(s2['a'])
+data = {'pi': 3.14159, 'e': 2.71828}  # dictionary
+print(data)
+s3 = pd.Series(data)
+print(s3)
+my_array = s3.values
+print(my_array)
+import numpy as np
+my_data = np.array([[5, 5, 4],
+                    [2, 3, 4]])
+hotels = pd.DataFrame(my_data, index = ["Alice rating", "Bob rating"],
+                   columns = ["Hilton", "Marriott", "Four Seasons"])
+hotels
+from google.colab import files
+uploaded = files.upload() # pick starbucks_drinkMenu_expanded.csv
+get_ipython().system('ls')
+import pandas as pd
+df = pd.read_csv('starbucks_drinkMenu_expanded.csv', index_col = 'Beverage')
+df.head()
+sorted_df = df.sort_values(by = "Calories", ascending=False)
+sorted_df.head()
+hotels
+hotels['Hilton']
+sum = 0
+for i in hotels['Hilton']:
+    sum += i
+print('Average Hilton Rating: ' + str(sum/len(hotels['Hilton'])))
+hotels.loc['Bob rating']
+hotels.loc['Bob rating', 'Marriott']
+hotels.iloc[1, 1]
+print(hotels.iloc[0, 1:2])
+print(hotels.loc['Bob rating', ['Marriott', 'Hilton']])
+(df['Calories'] > 300)
+df[df['Calories'] > 300].head()
+df[(df['Calories'] > 300) & (df['Beverage_prep'] == 'Soymilk')].head()
+df['bad_fat'] = df['Trans_Fat_g'] + df['Saturated_Fat_g']
+df.head()
+size_ounces_dict = {'Short': 8, 'Tall': 12, 'Grande': 16, 'Venti': 20}
+ounces_list = []
+for drink in df['Beverage_prep']:
+    ounces_list.append(size_ounces_dict.get(drink, -1))
+df['ounces'] = ounces_list
+df.head()
+def size_to_ml(size_name):
+    size_ounces_dict = {'Short': 8, 'Tall': 12, 'Grande': 16, 'Venti': 20}
+    return size_ounces_dict.get(size_name,0) * 29.5735
+ml = df['Beverage_prep'].map(size_to_ml)
+print(ml)
+# Lecture15Pandas.py
+import pandas as pd
+df = pd.read_csv('starbucks_drinkMenu_expanded.csv', index_col = 'Beverage')
+df.head()
+print(df.loc[:, "Protein_g"].mean())
+print(df.loc[:, "Protein_g"].max())
+print(df.loc[:, "Protein_g"].idxmax()) # "argmax," gives index with biggest value
+df.describe()
+df.corr(numeric_only=True)  # New to pandas 2.0.0: chokes on strings without added arg
+df.columns
+df.dtypes
+string = 'string'
+string[:-1]
+df['Vitamin_A'] = df['Vitamin_A'].str[0:-1] # Remove the % at the end
+df['Vitamin_A']
+df['Vitamin_A'] = pd.to_numeric(df['Vitamin_A'])
+df.dtypes
+df['Vitamin_A'] = df['Vitamin_A'].astype('float64')
+df.dtypes
+df.corr(numeric_only=True)
+df.isnull().sum()
+df = df.dropna(axis=0, how="any") # Remove the offending row
+df.isnull().sum()
+calorie_max = 0
+best_name = ""
+for index, row in df.iterrows():
+  if row['Calories'] > calorie_max:
+    calorie_max = row['Calories']
+    best_name = index
+print(best_name)
+protein = df.loc[:, "Protein_g"]
+protein.hist(bins=20); # Create a histogram with 20 equally spaced bins for the data
+subplot = df[["Protein_g", "Vitamin_A"]] # Notice another way to get desired columns
+subplot.boxplot(); # Boxplots give median value, middle 50% of data, and range of non-outliers
+from google.colab import files
+uploaded = files.upload() # pick titanic.csv
+df = pd.read_csv('titanic.csv', index_col = 'PassengerId')
+df.head()
+df.columns
+df.dtypes
+df.describe()
+df.corr(numeric_only=True)
+males = df[df['Sex'] == 'male']
+males.head()
+males.describe()
+females = df[df['Sex'] == 'female']
+females.describe()
+df['sex_numeric'] = df['Sex'] == 'female'
+df.corr(numeric_only=True)
+third_class = df[df['Pclass'] == 3]
+second_class = df[df['Pclass'] == 2]
+first_class = df[df['Pclass'] == 1]
+third_class['Survived'].hist();
+second_class['Survived'].hist();
+first_class['Survived'].hist();
+# Lecture16Strings.py
+my_cost = 12.95821
+print(f'The total cost was {my_cost} dollars')
+print(f'The total cost was {my_cost:.2f} dollars')
+groceries = "milk,eggs,yogurt"
+grocerieslist = groceries.split(',')
+print(grocerieslist)
+','.join(['milk', 'eggs', 'yogurt'])
+'     milk,eggs,yogurt     '.strip()
+lines = "SERVANT: Sir, there are ten thousand--\nMACBETH: Geese, villain?"
+linelist = lines.splitlines()  # A shortcut for split('\n')
+for line in linelist:
+  if line.startswith("MACBETH"):
+    print(line.split(": ")[1])
+print('Wow\n\twow!')
+print("foo" in "food")
+print("foodfood".replace("foo", "ra"))
+import numpy as np
+import pandas as pd
+my_data = np.array([["Excellent", "   Okay   ", "   Okay"], ["Great    ", "   Good", "   Good"]])
+df = pd.DataFrame(my_data, columns = ["Hilton", "Marriott", "Four Seasons"], index = ["Alice", "Bob"])
+df
+marriott = df['Marriott']
+for s in marriott:
+    print(s)
+print('---')
+for s in marriott.str.strip():
+    print(s)  # Look, no extra whitespace
+marriott.str.match("\s*Okay\s*")
+import re
+pattern = '02143'
+longstring = 'Somerville, MA 02143'
+result = re.search(pattern, longstring)
+if result:  # (if it's not None)
+    print(result.group())
+longstring = '0132428190214200'
+pattern2 = '02143'
+result2 = re.search(pattern2, longstring)
+print(result2)
+pattern3 = '\d\d\d\d\d'
+longstring = 'Somerville, MA 02143'
+result3 = re.search(pattern3, longstring)
+if result3:
+    print(result3.group())
+longstring = 'My phone number is 5555555'
+pattern4 = 'phone number is \d+'
+result4 = re.search(pattern4, longstring)
+if result4:
+    print(result4.group())
+longstring = 'Call me at 555-5555'
+pattern5 = '\d\d\d-?\d\d\d\d'
+result5 = re.search(pattern5, longstring)
+if result5:
+    print(result5.group())
+longstring = "Call me at 1-800-555-5555."
+pattern = "(\d-)?(\d\d\d-)?\d\d\d-?\d\d\d\d"
+result = re.search(pattern, longstring)
+if result:
+    print(result.group())
+longstring2 = "Call me at 555-5555."
+result = re.search(pattern, longstring2)
+if result:
+    print(result.group())
+pattern = "Somerville, (MA|NJ)"
+longstring = "Somerville, NJ 02143"
+result = re.search(pattern, longstring)
+if result:
+    print(result.group())
+longstring = "States with a Somerville:  AL, IN, ME, MA, NJ, OH, TN, TX"
+pattern = "[A-Z][A-Z]"  # Get capital letters within A-Z range
+result = re.findall(pattern, longstring)
+print(result)
+longstring = "The stock NVDA went down 4.54 points"
+pattern = "stock (\w+) went down (\d+\.\d+) points"
+result = re.search(pattern, longstring)
+if result:
+    print(result.group())
+    print(result.group(1))  # Subgroup 1, the first () in the pattern
+    print(result.group(2))
+import re
+longstring = "We paid $100 for those shoes"
+pattern = '\$\d+'
+result = re.search(pattern, longstring)
+print(result.group())
+# Lecture18Objects.py
+class Car:
+    pass
+car1 = Car()
+car2 = Car()
+car3 = Car()
+print(isinstance(car1,Car))
+car1.year = 2010
+car1.make = "Honda"
+car1.model = "Fit"
+car1.color = "blue"
+car2.year = 2013
+car2.make = "Toyota"
+car2.model = "Camry"
+car2.color = "silver"
+print(f"This car is a {car1.year} {car1.color} {car1.make} {car1.model}")
+my_car = (2010, 'Honda', 'Fit', 'blue')
+print(f"This car is a {my_car[0]} {my_car[3]} {my_car[1]} {my_car[2]}")
+class Car:
+    def print_facts(self):
+        print(f"This car is a {self.year} {self.color} {self.make} {self.model}")
+car1 = Car()
+car2 = Car()
+car1.year = 2010
+car1.make = "Honda"
+car1.model = "Fit"
+car1.color = "blue"
+car2.year = 2013
+car2.make = "Toyota"
+car2.model = "Camry"
+car2.color = "silver"
+car1.print_facts()
+car2.print_facts()
+class Car:
+    def __init__(self, year, make, model, color):
+        # It's common for the constructor's arguments
+        # to have similar or identical names to the attributes they set
+        # (but we still have to say one should be set to the other)
+        self.year = year
+        self.make = make
+        self.model = model
+        self.color = color
+    def print_facts(self):
+        print(f"This car is a {self.year} {self.color} {self.make} {self.model}")
+car1 = Car(2010, "Honda", "Fit", "blue")
+car2 = Car(2013, "Toyota", "Camry", "silver")
+car1.print_facts()
+car2.print_facts()
+def newest_car(list_of_cars):
+    if not list_of_cars:  # ie, empty list
+        return None
+    best_year = list_of_cars[0].year
+    best_car = list_of_cars[0]
+    for car in list_of_cars:
+        # This warning message could prevent a bug if we try
+        # to hand this function the wrong list
+        if not isinstance(car, Car):
+            print('Warning, list had non-car items!')
+        elif car.year > best_year:
+            best_year = car.year
+            best_car = car
+    return best_car
+newest_car([car1, car2]).print_facts()
+class Bill:
+  """ Represents a bill at a restaurant.
+  _items (list of tuples):  list of (item name, cost) tuples
+  """
+  def __init__(self, items):
+    self._items = items
+  # "Getter"
+  def items(self):
+    return self._items
+  # "Setter"
+  def set_items(self, items):
+    self._items = items
+  def total_cost_pretax(self):
+    total = 0
+    for name, cost in self._items:
+      total += cost
+    return total
+  def total_cost_with_tax(self, tax_rate):
+    return round(self.total_cost_pretax() * (1 + tax_rate), 2)
+my_lunch = [("Ham Sandwich", 9), ("Coke", 2)]
+new_bill = Bill(my_lunch)
+cost_with_tax = new_bill.total_cost_with_tax(0.08)
+print(f"Total cost: {cost_with_tax}")
+new_bill.items() # could have said new_bill._items, but we were told not to
+class Bill:
+  """ Represents a bill at a restaurant.
+  _item_names (list of strings):  list of items on bill
+  _item_costs (list of ints): list of prices of items on bill
+  _items is not here anymore! sorry anybody who wrote code that uses it, we warned you!
+  """
+  def __init__(self, items):
+    self._item_names = [item[0] for item in items]
+    self._item_costs = [item[1] for item in items]
+  # "Getter"
+  def items(self):
+    # list(zip(a, b)) returns a list of tuples combining a and b
+    return list(zip(self._item_names, self._item_costs))
+  # "Setter"
+  def set_items(self, items):
+    self._item_names = [item[0] for item in items]
+    self._item_costs = [item[1] for item in items]
+  def total_cost_pretax(self):
+    total = 0
+    for name, cost in self._items:
+      total += cost
+    return total
+  # Notice that we can call another method with this one
+  def total_cost_with_tax(self, tax_rate):
+    return round(self.total_cost_pretax() * (1 + tax_rate), 2)
+my_lunch = [("Ham Sandwich", 9), ("Coke", 2)]
+new_bill = Bill(my_lunch)
+print(new_bill.items())  # this still works, but _items would have broken
+class Circle:
+  def __init__(self, radius):
+    if radius < 0:
+      raise ValueError("Can't have negative circle radius")
+    self.radius=radius
+Circle(-1)
+class Circle2:
+  def __init__(self,radius=2):
+    self.radius = radius
+Circle2().radius
+class Student:
+  def __init__(self, age, major, year):
+    self.age = age
+    self.major = major
+    self.year = year
+  def get_older(self, amount):
+    self.age += amount
+bob = Student(20,"Biology","Sophomore")
+bob.get_older(2)
+print(bob.age)
+car1 = Car(2010, "Honda", "Fit", "blue")
+car2 = car1
+car2.color = "black"
+car1.print_facts()  # It's black now
+car2.print_facts()
+import copy
+car2 = copy.copy(car1)
+car2.color = "white"
+car1.print_facts()
+car2.print_facts()
+from google.colab import files
+uploaded = files.upload() # import books.csv
+import pandas as pd
+df = pd.read_csv('books.csv', index_col = 'title')
+df.head()
+class Book:
+    def __init__(self, title, author, average_rating):
+        self.title = title
+        self.author = author
+        self.average_rating = average_rating
+        # Could add more fields from the dataset if desired
+class Publisher:
+    def __init__(self, df, publisher_name):
+        self.name = publisher_name
+        self.books = []
+        for row in df.itertuples():
+            if row.publisher == publisher_name:
+                self.books.append(Book(row.Index, row.authors, row.average_rating))
+    def average_rating(self):
+        total = 0
+        for book in self.books:
+            total += book.average_rating
+        return total/len(self.books)
+scholastic = Publisher(df,'Scholastic Inc.')
+scholastic.average_rating()
+# Lecture19MoreOO.py
+class Client:  # both Faculty and Students
+  def __init__(self, birthyear, uid):
+    self.birthyear = birthyear
+    self.uid = uid
+  def get_uid(self):
+    return self.uid
+  def get_birthyear(self):
+    return self.birthyear
+class Student(Client):  # inherit from Client
+  def __init__(self, birthyear, uid, gradyear):
+    self.birthyear = birthyear
+    self.uid = uid
+    self.gradyear = gradyear
+  def get_gradyear(self):
+    return self.gradyear
+class Faculty(Client):
+  pass     # Nothing else we want to do for Faculty
+alice = Student(2003, 123456789, 2024)
+print(alice.get_birthyear()) # Inherited from Client
+print(alice.get_uid())       # Inherited from Client
+print(alice.get_gradyear())  # Specific to Student
+person1 = Student(2000,123456,2025)
+if not isinstance(person1, Faculty):
+    print("Hey, this person doesn't have permission to do this!")
+else:
+    print("Welcome, Faculty number " + str(person1.uid) + "!")
+student1 = Student(2000,123456,2025)
+print(isinstance(student1,Student))
+print(isinstance(student1,Client))
+print(isinstance(student1,object)) # Every class inherits from object
+class Student(Client):  # inherit from Client
+  def __init__(self, birthyear, uid, gradyear):
+    super().__init__(birthyear, uid)
+    self.gradyear = gradyear
+  def get_gradyear():
+    return self.gradyear
+bob = Student(2002,987654321,2022)
+print(bob.get_uid()) # inherited from Client
+class Trip:
+  def __init__(self,cost,start_date,end_date):
+    self.cost = cost
+    self.start_date = start_date
+    self.end_date = end_date
+    self.reimbursed = False
+  def cost(self):
+    return self.cost
+  def reimburse(self):
+    self.reimbursed = True
+  def dates(self):
+    return self.startDate, self.endDate
+class EquipmentOrder:
+  def __init__(self,cost,domestic_seller):
+    self.cost = cost
+    self.reimbursed = False
+    self.domestic_seller = domestic_seller
+  def cost(self):
+    return self.cost
+  def reimburse(self):
+    self.reimbursed = True
+  def domestic_seller(self):
+    return self.domestic_seller
+class Expense:
+  def __init__(self,cost):
+    self.cost = cost
+    self.reimbursed = False
+  def cost(self):
+    return self.cost
+  def reimburse(self):
+    self.reimbursed = True
+class Trip(Expense):
+  def __init__(self,cost,start_date,end_date):
+    super().__init__(cost)
+    self.start_date = start_date
+    self.end_date = end_date
+  # inherit cost, reimburse
+  def dates(self):
+    return self.start_date, self.end_date
+class EquipmentOrder(Expense):
+  def __init__(self,cost,domestic_seller):
+    super().__init__(cost)
+    self.domestic_seller = domestic_seller
+  # inherit cost, reimburse
+  def domestic_seller(self):
+    return self.domestic_seller
+class Employee:
+    def __init__(self, name, salary, title, years_of_service):
+        self.name = name
+        self.salary = salary
+        self.title = title
+        self.years_of_service = years_of_service
+    def give_raise(self, raise_amount):
+        self.salary += raise_amount
+    def change_title(self, new_title):
+        self.title = new_title
+    def update_years_of_service(self, increase):
+        self.years_of_service += increase
+class Contractor:
+    def __init__(self, name, salary, contract_duration):
+        self.name = name
+        self.salary = salary
+        self.contract_duration = contract_duration
+    def give_raise(self, raise_amount):
+        self.salary += raise_amount
+alice = Employee("Alice", 90000, "Manager", 7)
+alice.give_raise(10000)
+print(alice.salary)
+bob = Contractor("Bob", 80000, 2)
+bob.give_raise(10000)
+print(bob.salary)
+class Worker:
+    def __init__(self, name, salary):
+        self.name = name
+        self.salary = salary
+    def give_raise(self, raise_amount):
+        self.salary += raise_amount
+class Employee(Worker):
+    def __init__(self, name, salary, title, years_of_service):
+        super().__init__(name, salary)
+        self.title = title
+        self.years_of_service = years_of_service
+    def change_title(self, new_title):
+        self.title = new_title
+    def update_years_of_service(self, increase):
+        self.years_of_service += increase
+class Contractor(Worker):
+    def __init__(self, name, salary, contract_duration):
+        super().__init__(name, salary)
+        self.contract_duration = contract_duration
+alice = Employee("Alice", 90000, "Manager", 7)
+alice.give_raise(10000)
+print(alice.salary)
+bob = Contractor("Bob", 80000, 2)
+bob.give_raise(10000)
+print(bob.salary)
+class Gradyear:
+  def __init__(self, year):
+    self.year = year
+year = Gradyear(2024)
+print(year)
+class Gradyear:
+  def __init__(self, year):
+    self.year = year
+  def __str__(self):    # Our own implementation
+    return str(self.year)
+gradyear = Gradyear(2024)
+print(gradyear)
+gy1 = Gradyear(2024)
+gy2 = Gradyear(2024)
+print(gy1 == gy2)
+myset = set()
+myset.add(gy1)
+myset.add(gy2)
+len(myset)
+class Gradyear:
+  def __init__(self, year):
+    self.year = year
+  def __str__(self):    # Our own implementation
+    return str(self.year)
+  def __eq__(self, other):
+    return self.year == other.year
+  def __hash__(self):
+    return self.year # Just store by number itself
+gy1 = Gradyear(2024)
+gy2 = Gradyear(2024)
+print(gy1 == gy2)
+myset = set()
+myset.add(gy1)
+myset.add(gy2)
+len(myset)
+# Lecture20Recursion.py
+def bad_recursion():
+  print("Bad!")
+  bad_recursion()
+bad_recursion()
+def factorial(n):
+  # Omitting checks to make sure we're a natural number, etc
+  if n == 1:
+    return 1
+  return n * factorial(n-1)
+print (factorial(4))
+def factorial(n):
+  # Omitting checks to make sure we're a natural number, etc
+  print(f'Evaluating {n}!')
+  if n == 1:
+    print('Returning 1')
+    return 1
+  result = n * factorial(n-1)
+  print(f'Returning {result}')
+  return result
+print (factorial(4))
+def sum_m_to_n(m, n):
+    if n == m:
+        return m
+    result = n + sum_m_to_n(m, n-1)
+    return result
+sum_m_to_n(3, 7) # 3 + 4 + 5 + 6 + 7 = 25
+def sum_m_to_n(m, n):
+    print(f'Evaluating sum from {m} to {n}')
+    if n == m:
+        print(f'Returning {m}')
+        return m
+    result = n + sum_m_to_n(m, n-1)
+    print(f'Returning {result}')
+    return result
+sum_m_to_n(3, 7) # 3 + 4 + 5 + 6 + 7 = 25
+def mypow(a, p):
+    if p == 0:
+        return 1
+    result = a * mypow(a, p-1)
+    return result
+mypow(2,8)
+def mypow(a, p):
+    print(f'Evaluating {a}^{p}')
+    if p == 0:
+        print('Returning 1')
+        return 1
+    result = a * mypow(a, p-1)
+    print(f'Returning {result}')
+    return result
+mypow(2,8)
+def fib(n):
+    if (n == 0):
+        return 0
+    if (n == 1):
+        return 1
+    return fib(n-1) + fib(n-2)
+for i in range(10):
+    print(fib(i))
+def r_perm(r, n):
+    if n == r+1:
+        return n
+    return n * r_perm(r,n-1)
+r_perm(5,7)
+def iter_factorial(n):
+  running_fact = 1
+  for i in range(1,n+1):
+    running_fact *= i
+  return running_fact
+print(iter_factorial(4))
+import numpy as np
+def iter_fib(n):
+    if n == 0 or n == 1:
+        return n
+    fibs = np.zeros(n+1)
+    fibs[0] = 0
+    fibs[1] = 1
+    for i in range(2,n+1):
+        fibs[i] = fibs[i-1] + fibs[i-2]
+    return int(fibs[n])
+for i in range(10):
+    print(iter_fib(i))
+def power_set(setstring):
+    if len(setstring) == 0:
+        return [""]
+    subset_list = []
+    # Recursive call gets all the subsets that don't involve the first character
+    smaller_power_set = power_set(setstring[1:])
+    # The starting character is either in the subset...
+    for substring in smaller_power_set:
+        subset_list.append(setstring[0] + substring)
+    # ...or not.
+    for substring in smaller_power_set:
+        subset_list.append(substring)
+    return subset_list
+power_set("abcd")
+def recursive_sum(lst):
+    if not lst:  # empty list
+        return 0
+    return lst[0] + recursive_sum(lst[1:])
+recursive_sum([1,2,3])
+def recursive_filter(min_val, lst):
+    if not lst:
+        return []
+    if lst[0] >= min_val:
+        return [lst[0]] + recursive_filter(min_val, lst[1:])
+    else:
+        return recursive_filter(min_val, lst[1:])
+recursive_filter(3, [1, 2, 3, 4, 5])
+def recursive_index(item, lst, index):  # index tracks where we are in the list
+    if not lst:
+        return None   # not found
+    if lst[0] == item:
+        return index
+    return recursive_index(item,lst[1:],index+1)
+recursive_index(5, [0, 1, 2, 5], 0)
+def recursive_skiplist(lst):
+    if len(lst) == 0:
+        return []
+    if len(lst) == 1:
+        return lst
+    return [lst[0]] + recursive_skiplist(lst[2:])
+recursive_skiplist([5,3,7,2,9])
+# Lecture21DataStructures.py
+class ll_node:
+  def __init__(self, num):
+    self.number = num
+    self.next = None
+  def append(self, num):
+    if self.next == None:     # End of the list - add the node
+      self.next = ll_node(num)
+    else:
+      self.next.append(num) # Recursively append to rest of list
+  def contains(self, othernum):
+    if self.number == othernum:  # We found it
+      return True
+    elif self.next == None:  # We reached the end, didn't find it
+      return False
+    # Not here, there's more list - so, keep looking (recursively)
+    return self.next.contains(othernum)
+  def __str__(self):
+    if self.next == None:  # Last number
+        return str(self.number)
+    # Print this and print the rest (more recursion)
+    return str(self.number) + ' ' + str(self.next)
+mylist = ll_node(6)
+mylist.append(1)
+mylist.append(7)
+print(mylist)
+print('Contains 7: ' + str(mylist.contains(7)))
+print('Contains 5: ' + str(mylist.contains(5)))
+import numpy as np
+class dynamic_array:  # Showing how Python lists work
+  def __init__(self, initial_size):
+    self.memory = np.zeros(initial_size)
+    self.occupied = 0
+    self.size = initial_size
+  def __str__(self):
+    return str(self.memory)
+  def append(self, val):
+    if self.occupied == self.size:
+      print('Resizing...')
+      new_memory = np.zeros(self.size*2)
+      # A "hiccup" in running time as everything's copied
+      for i in range(len(self.memory)):
+        new_memory[i] = self.memory[i]
+      self.memory = new_memory
+      self.size = self.size*2
+    print('Adding ' + str(val))
+    self.memory[self.occupied] = val
+    self.occupied += 1
+my_array = dynamic_array(2)
+print(my_array)
+my_array.append(1)
+my_array.append(1)
+print(my_array)
+my_array.append(1)
+print(my_array)
+my_array.append(1)
+print(my_array)
+class FolderTree:
+  # binary left and right are its fields
+  def __init__(self, val):
+    self.left = None
+    self.right = None
+    self.val = val
+  def addLeft(self, node):
+    self.left = node
+  def addRight(self, node):
+    self.right = node
+  def find(self, v):
+    if self.val == v:
+      return True
+    # "if self.left" is checking that self.left exists -
+    # else error when we run self.left.find()
+    if self.left and self.left.find(v):
+      return True
+    if self.right and self.right.find(v):
+      return True
+    return False
+leftleftchild = FolderTree("wow.exe")
+leftrightchild = FolderTree("xls.exe")
+rightleftchild = FolderTree("lec12.pdf")
+rightrightchild = FolderTree("lec14.pdf")
+leftparent = FolderTree("apps")
+rightparent = FolderTree("lecs")
+leftparent.addLeft(leftleftchild)
+leftparent.addRight(leftrightchild)
+rightparent.addLeft(rightleftchild)
+rightparent.addRight(rightrightchild)
+root = FolderTree("root")
+root.addLeft(leftparent)
+root.addRight(rightparent)
+print(root.find("wow.exe"))
+print(root.find("lec13.exe"))
+def count_nodes(tree):
+    if tree == None:
+        return 0
+    return 1 + count_nodes(tree.left) + count_nodes(tree.right)
+count_nodes(root)
+def calc_depth(tree):
+    if tree is None:
+        return 0
+    if tree.left is None and tree.right is None:
+        return 0  # Leaf has depth 0 in its subtree
+    return 1 + max(calc_depth(tree.left), calc_depth(tree.right))
+calc_depth(root)
+class BinarySearchTree:
+  # binary left and right are its fields
+  def __init__(self, val):
+    self.left = None
+    self.right = None
+    self.val = val
+  def addLeft(self, node):
+    self.left = node
+  def addRight(self, node):
+    self.right = node
+  def find(self, v):
+    if self.val == v:
+      return True
+    if v < self.val:
+      if self.left:
+        print("Going Left")
+        return self.left.find(v)
+      else:
+        return False
+    else:
+      if self.right:
+        print("Going Right")
+        return self.right.find(v)
+      else:
+        return False
+root = BinarySearchTree("m")
+leftparent = BinarySearchTree("f")
+rightparent = BinarySearchTree("q")
+leftleftchild = BinarySearchTree("a")
+leftrightchild = BinarySearchTree("h")
+rightleftchild = BinarySearchTree("o")
+rightrightchild = BinarySearchTree("u")
+leftparent.addLeft(leftleftchild)
+leftparent.addRight(leftrightchild)
+rightparent.addLeft(rightleftchild)
+rightparent.addRight(rightrightchild)
+root.addLeft(leftparent)
+root.addRight(rightparent)
+print(root.find("h"))
+print(root.find("d"))
+class infect_tree:
+    # name is a string, infects is a list of infect_tree's infected
+    def __init__(self, name, infects):
+        self.name = name
+        self.infects = infects
+jake = infect_tree('jake', [])
+eric = infect_tree('eric', [])
+fifi = infect_tree('fifi', [])
+ged = infect_tree('ged', [])
+hao = infect_tree('hao', [])
+idris = infect_tree('idris', [jake])
+bob = infect_tree('bob', [eric])
+che = infect_tree('che', [])
+daphne = infect_tree('daphne', [fifi, ged, hao, idris])
+alice = infect_tree('alice', [bob, che, daphne])
+def find_most_infections(my_tree):
+    best_infects = len(my_tree.infects)
+    best_name = my_tree.name
+    for infect in my_tree.infects:
+        name, infects = find_most_infections(infect) # Recursion...
+        if infects > best_infects:
+            best_infects = infects
+            best_name = name
+    return best_name, best_infects
+find_most_infections(alice)
+def find_all_descendants(my_tree):
+    my_list = [my_tree.name]
+    for infect in my_tree.infects:
+        my_list += find_all_descendants(infect)  # More recursion
+    return my_list
+find_all_descendants(daphne)
+# Lecture22ScikitLearn.py
+from sklearn.datasets import load_digits
+import matplotlib.pyplot as plt
+digits = load_digits()
+print(digits.data.shape) # Examples x 64 pixels
+import matplotlib.pyplot as plt
+plt.gray()
+plt.matshow(digits.images[0]) # Notice images[0] is 2D
+from warnings import simplefilter
+simplefilter(action='ignore', category=FutureWarning)
+from sklearn.neighbors import KNeighborsClassifier
+nbrs = KNeighborsClassifier(n_neighbors=3).fit(digits.data, digits.target)
+nbrs.score(digits.data, digits.target) # Find accuracy on the training dataset
+from sklearn.model_selection import train_test_split
+data_train, data_test, label_train, label_test = train_test_split(digits.data, digits.target, test_size=0.2)
+nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train)
+nbrs.score(data_test,label_test)
+print(nbrs.predict(data_test[0:3]))
+def reshape_and_show(num, data_test):
+    image = data_test[num].reshape(8,8)
+    plt.matshow(image)
+reshape_and_show(0,data_test)
+reshape_and_show(1,data_test)
+reshape_and_show(2,data_test)
+from sklearn.datasets import fetch_lfw_people
+faces = fetch_lfw_people(min_faces_per_person = 100)
+plt.imshow(faces.images[5], cmap="gray")
+data_train, data_test, label_train, label_test = train_test_split(faces.data, faces.target, test_size=0.2)
+nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train)
+nbrs.score(data_test,label_test)
+import random
+random.seed(110)  # Set seed - comment this out to get different rolls
+print(random.randint(1,8))  # Normally produces random integer 1-8
+print(random.randint(1,8))
+data_train, data_test, label_train, label_test = train_test_split(faces.data,
+                                                                  faces.target, test_size=0.2,
+                                                                  random_state=110) # Set the seed
+nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train)
+nbrs.score(data_test,label_test)
+from sklearn.model_selection import cross_val_score
+cross_val_score(nbrs, data_train, label_train)
+import numpy as np
+for i in range(1,10):
+  nbrs = KNeighborsClassifier(n_neighbors=i)
+  print(np.mean(cross_val_score(nbrs, data_train, label_train)))
+# Lecture23DecisionTrees.py
+import math
+yes_branch_entropy = 0
+no_branch_entropy = -0.2 * math.log(0.2,2) - 0.8 * math.log(0.8, 2)
+pr_yes = 5/2005
+pr_no = 2000/2005
+print(pr_yes * yes_branch_entropy + pr_no * no_branch_entropy)
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+import numpy as np
+iris = load_iris()
+iris.feature_names
+iris.target_names
+iris.data[0]
+features_train, features_test, labels_train, labels_test = \
+train_test_split(iris.data, iris.target, test_size=0.1, random_state=110)
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.model_selection import cross_val_score
+dtree = DecisionTreeClassifier(criterion="entropy", random_state=110)
+dtree.fit(features_train, labels_train)
+dtree.score(features_test, labels_test) # Gives accuracy
+import matplotlib.pyplot as plt
+from sklearn import tree
+plt.figure(figsize=(14,10))
+tree.plot_tree(dtree, feature_names = iris.feature_names, class_names = iris.target_names)
+# Lecture24RandomForestsOnly.py
+from sklearn.datasets import load_iris
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+import numpy as np
+iris = load_iris()
+iris["feature_names"]
+features_train, features_test, labels_train, labels_test = \
+  train_test_split(iris['data'], iris['target'],
+                   test_size=0.1,random_state=110)
+irisforest = RandomForestClassifier(n_estimators=200,criterion="entropy",random_state=110)
+irisforest.fit(features_train, labels_train)
+irisforest.score(features_test, labels_test)
+irisforest.feature_importances_
+# Lecture25Regression.py
+import numpy as np
+x = np.linspace(1984, 2016, 33)
+y = [48.0, 47.3, 47.2, 47.4, 47.2, 46.7,
+     49.7, 49.6, 46.4, 47.3, 47.7, 47.8, 47.3, 47.4, 50.4, 49.8,
+     47.5, 49.1, 49.4, 47.1, 47.6, 48.4, 50.1, 48.3, 48.6, 47.8,
+     50.4, 49.7, 51.4, 48.8, 47.7, 48.5, 50.3]
+import matplotlib.pyplot as plt
+plt.plot(x,y,'o')
+import sklearn.linear_model as lm
+from sklearn.linear_model import LinearRegression
+linear_model = LinearRegression()
+x = x.reshape(-1,1)
+linear_model.fit(x,y)
+y_hat = linear_model.predict(x)
+plt.plot(x,y,'o')
+plt.plot(x,y_hat,'r')
+print(f'The temperature is rising {linear_model.coef_[0]:.4f} degrees F per year')
+print(f'{linear_model.intercept_:.2f}')
+linear_model.score(x,y)
+methane = np.array([12.81, 25.15, 38.06, 49.47, 60.24, 71.32,
+     80.08, 94.14, 96.49, 100.32, 107.54, 111.50, 113.97, 120.26, 132.39, 134.82,
+     133.30, 132.60, 135.91, 140.65, 135.76, 136.14, 138.11, 145.90, 152.41, 157.13,
+     162.33, 167.15, 172.17, 177.86, 190.62, 200.65, 207.73])
+mass_co = [84, 82.7, 84.9, 81.7, 81.9, 79.2, 79.9, 85.9, 84.3, 81.9,
+           82.9, 82.8,83.7, 85, 83.6, 85, 77.1, 80.4, 77.2, 70.6,
+           72.0, 68.1, 61.9, 65.7, 63.8, 65.6, 63.9]
+y_from_90 = y[6:]  # From the last example, these are the temperatures
+methane_from_90 = methane[6:]
+x = np.transpose(np.array([mass_co, methane_from_90]))
+x
+temp_model = LinearRegression()
+temp_model.fit(x,y_from_90)
+print(temp_model.coef_)
+print(temp_model.intercept_)
+from sklearn.tree import DecisionTreeRegressor
+import numpy as np
+import matplotlib.pyplot as plt
+model = DecisionTreeRegressor() # no pruning of any kind, so expect overfitting
+x = np.linspace(1984, 2016, 33)
+x = x.reshape(-1,1)
+y = [48.0, 47.3, 47.2, 47.4, 47.2, 46.7,
+     49.7, 49.6, 46.4, 47.3, 47.7, 47.8, 47.3, 47.4, 50.4, 49.8,
+     47.5, 49.1, 49.4, 47.1, 47.6, 48.4, 50.1, 48.3, 48.6, 47.8,
+     50.4, 49.7, 51.4, 48.8, 47.7, 48.5, 50.3]
+xtrain = x[:30]
+ytrain = y[:30]
+model.fit(xtrain,ytrain)
+yhat = model.predict(x)
+plt.plot(x,y,'o')
+plt.plot(x[:30],yhat[:30])
+plt.plot(x[29:],yhat[29:],'r') # Plot line to test predictions in red
+model = DecisionTreeRegressor(max_depth = 3) # maybe overdoing it on the pruning
+x = np.linspace(1984, 2016, 33)
+prev_value_features = [0] + y.copy()[:-1] # shift y values so we see the previous one; discard last
+combined_features = np.array([x, prev_value_features]).transpose()
+print(combined_features)
+xtrain = combined_features[:30,:]
+model.fit(xtrain,ytrain)
+yhat = model.predict(combined_features)
+plt.plot(x,y,'o')
+plt.plot(x[:30],yhat[:30])
+plt.plot(x[29:],yhat[29:],'r')
+from sklearn.ensemble import RandomForestRegressor
+model = RandomForestRegressor()
+model.fit(xtrain,ytrain) # xtrain has the matrix we made in the previous code box
+yhat = model.predict(combined_features)
+plt.plot(x,y,'o')
+plt.plot(x[:30],yhat[:30])
+plt.plot(x[29:],yhat[29:],'r')
+from sklearn.neighbors import KNeighborsRegressor
+model = KNeighborsRegressor(n_neighbors=3)
+model.fit(xtrain,ytrain) # xtrain has the matrix we made in the previous code box
+yhat = model.predict(combined_features)
+plt.plot(x,y,'o')
+plt.plot(x[:30],yhat[:30])
+plt.plot(x[29:],yhat[29:],'r')
+# Lecture26ModernNLPandML.py
+import pandas as pd
+SST2_LOC = 'https://github.com/clairett/pytorch-sentiment-classification/raw/master/data/SST2/train.tsv'
+df = pd.read_csv(SST2_LOC, delimiter='\t', header=None)
+df
+import nltk
+from nltk.tokenize import word_tokenize
+nltk.download('punkt') # Name means 'period' in German; from Kiss and Strunk 2006
+word_tokenize("I won't sell my cat for even $1,000,000,000.")
+def wordset(raw_text):
+  tokenized = word_tokenize(raw_text.lower())
+  return set(tokenized)
+def all_words_set(df_column):
+  set_of_all = set()
+  dict_of_all = {}
+  for row in df_column:
+    textset = wordset(row)
+    set_of_all = set_of_all.union(textset)
+    dict_of_all[row] = textset
+  return set_of_all, dict_of_all
+def one_hot_columns(df_column):
+  all_words, all_tokenizations = all_words_set(df_column)
+  word_dict = {}
+  for word in all_words:
+    word_present_list = []
+    for line_num in range(len(df_column)):
+      if word in all_tokenizations[df_column[line_num]]:
+        word_present_list.append(1)
+      else:
+        word_present_list.append(0)
+    word_dict[word] = word_present_list
+  # We can create a dataframe from a dictionary of column header
+  # to list of column values
+  return pd.DataFrame.from_dict(word_dict)
+one_hot_cols = one_hot_columns(df.iloc[:,0])
+one_hot_cols
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+labels = df[1]
+features = one_hot_cols
+X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=42)
+clf = RandomForestClassifier(n_estimators=200, random_state=42)
+clf.fit(X_train, y_train)
+clf.score(X_test, y_test)
+one_hot_cols.sum()
+import gensim.downloader as api
+wv = api.load('word2vec-google-news-300')
+wv['king']
+print(wv.most_similar('king')) # Prints words and cosines of angles with 'king'
+import numpy as np
+def find_cosine(vec1, vec2):
+  # Scale vectors to both have unit length
+  unit_vec1 = vec1/np.linalg.norm(vec1)
+  unit_vec2 = vec2/np.linalg.norm(vec2)
+  # The dot product of unit vectors gives the cosine of their angle
+  return np.dot(unit_vec1,unit_vec2)
+print(find_cosine(wv['king'], wv['faucet']))
+wv.similarity('king', 'faucet')
+def find_avg_vector(txt, embedding):
+  words = word_tokenize(txt)
+  vec_sum = None
+  count = 0
+  for word in words:
+    if word in embedding:
+      count += 1
+      if vec_sum is not None:
+        vec_sum += embedding[word]
+      else:
+        # The embeddings are read-only unless you copy them
+        vec_sum = embedding[word].copy()
+  if vec_sum is None:
+    return pd.Series(np.zeros((300,)))  # Treat no word found in embedding as zero vector
+  return pd.Series(vec_sum/count)
+find_avg_vector('Long live the king and queen!', wv)
+df_embeddings = df[0].apply(lambda txt: find_avg_vector(txt, wv))
+df_embeddings.rename(columns=lambda x: 'feature'+str(x), inplace=True)
+df_augmented = pd.concat([df, df_embeddings], axis=1)
+df_augmented
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+labels = df_augmented[1]
+features = df_augmented.iloc[:,2:]
+X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=42)
+clf = RandomForestClassifier(n_estimators=200, random_state=42)
+clf.fit(X_train, y_train)
+clf.score(X_test, y_test)