# Lecture2HelloWorldAndExpressions.py print('Hello, world!') print('Hello 1') print('Hello 2') print('Hello 3') print('Hello, world!') print(Hello, world!) print(Hello, world!) # Intentionally creates an error! print(1) # Technically an expression print(1+2) # Two operands and an operator make an expression print(10*(10+1)) # The expression (10+1) acting as an operand print(3 + 8 / 2) # What do you predict? print(4 * 2 + 3 + 5 * 2) # And this one? print('Hello', 'world', '!') print(max(2,5,7)) print(max(2,7) + max(3,9)) # Using function calls as operands print(max(2,7) + max(3,9)) # Calc 7, calc 9, then add 1 2 3 max(2,7) None print(2) + 2 print('Hello, world!') max(2 ** 8, 3 ** 6, 5 ** 3) 1.0000000000000001 - 1 print(type(-100)) # int print(type(10.1)) # float print(type('A')) # str print(type(True)) # bool print(type('10')) # str print(type(10)) # int print(type(10.0)) # float print(type(True)) # bool 0.1 + 0.1 + 0.1 'Hello ' + 1111 'Hello ' + 'world' + '!' 'Hello ' + str(1111) 20 * 9/5 + 32 print('Temp: 68.0 F') print('Temp: ' + 20 * 9/5 + 32 + ' F') print('Temp: ' + str(20 * 9/5 + 32) + ' F') # Lecture3VariablesAndConditions.py two_to_the_eighth = 2 ** 8 print(two_to_the_eighth) two_to_the_eighth * 2 pay_per_hour = 18 pay_per_hour = 20 # Pay raise! print(pay_per_hour) counter = 0 counter = counter + 1 # It's an instruction, not an equality! print(counter) counter = counter + 1 print(counter) pay_per_hour = 20 hours = 40 total_pay = pay_per_hour * hours print(total_pay) Pay_Per_Hour = 15 # please avoid this capitalization style! print(pay_per_hour) # remembers the lowercase value silent_assignment = 0 20 = pay_per_hour print(undefined_var + 7) color = input('What is your favorite color? ') print('Yeah, ' + color + ' is pretty great!') to_square_str = input('What should I square? ') print(int(to_square_str) ** 2) city = input('What city are we in? ') print(city == 'Boston') answer = input('What is 2+2? ') print(answer == 4) # not going to work answer == '4' # but this works int(answer) == 4 # or this float(answer) == 4 # or even this print(1 < 1) print(1 > 1) print(1 != 1) print(1 <= 1) print(1 >= 1) print('aardvark' < 'zebra') print('capitalized' == 'Capitalized') 2 + 5 > 7 - 4 # 5 > 7 would be false, but (2+5) > (7-4) is True total = 0 value_str = input('Enter a value: ') value_int = int(value_str) if value_int < 0: print('Sorry, that was a negative value.') else: total = total + value_int print(total) if condition: statement_if_true1 statement_if_true2 statement_if_true3 ... else: statement_if_false1 statement_if_false2 ... statement_regardless1 statement_regardless2 ... value = int(input('Enter an integer:')) if value < 0: print('Negative') else: print('Positive') print('Done') password = input('Enter the password: ') if password == '1234': print('Correct!') print('Your account has $1000000 in it.') else: print('Incorrect.') print('Have a nice day.') num1_str = input('Enter an integer: ') num2_str = input('Enter a different integer: ') num1_int = int(num1_str) num2_int = int(num2_str) if num1_str == num2_str: print('The numbers were supposed to be different...') print('But you entered ' + num1_str + ' twice!') else: print(num2_str + ' divided by ' + num1_str + ' is...') print(num2_int / num1_int) # Divide by zero would be error, btw print('Done...') language = input('What is your favorite language? ') if language == 'Python': print('Mine too!') print('But there sure are a lot of languages out there....') value = int(input('Enter an integer between 0 and 100: ')) if value < 0: print('No negative numbers!') elif value > 100: print('That value is too large!') elif value == 42: print('That was the number I was thinking of!') else: print('Guess again.') value = int(input('Enter an integer between 0 and 100: ')) if value < 0: print('No negative numbers!') elif value > 100: print('That value is too large!') elif value >= 50: print('Big!') else: print('Small!') value = int(input('Enter an integer between 0 and 100: ')) if value < 0: print('No negative numbers!') else: if value > 100: print('That value is too large!') else: if value >= 50: print('Big!') else: print('Small!') age = int(input('Enter your age: ')) if age < 18: if age < 5: print('Just a toddler, then.') elif age < 12: print('Not quite a teenager, then.') else: print('Teenage years ... a difficult time!') else: print('An adult, then.') if age >= 55: print('And a senior citizen, too!') num1 = int(input('First number: ')) num2 = int(input('Second number: ')) num3 = int(input('Third number: ')) my_max = max(num1, num2, num3) my_min = min(num1, num2, num3) my_mean = (num1+num2+num3)/3 # Note importance of parens! print('Min: ' + str(my_min)) print('Max: ' + str(my_max)) print('Mean: ' + str(my_mean)) if num1 == num2: print(str(num1) + ' was repeated') elif num2 == num3: print(str(num2) + ' was repeated') elif num1 == num3: print(str(num3) + ' was repeated') else: print('The numbers were unique') # Lecture4WhileAndLists.py string = input('Enter a number: ') while string != 'stop': print(string + ' squared is ' + str(int(string) ** 2)) string = input('Enter a number: ') print('Done.') counter = 0 while counter < 21: print(counter) counter = counter + 1 print(counter) counter = 1 print('We will now iterate three times...') while counter < 4: print('Iteration ' + str(counter)) counter = counter + 1 total = 0 count = 0 value_str = input('Enter a number, or "done" if done: ') while value_str != 'done': count = count + 1 value_int = int(value_str) total = total + value_int value_str = input('Enter a number, or "done" if done: ') if count > 0: print('The average is ' + str(total/count)) total = 0 count = 0 value_str = input('Enter a number, or "done" if done: ') while value_str != 'done': count += 1 value_int = int(value_str) total += value_int value_str = input('Enter a number, or "done" if done: ') if count > 0: print('The average is ' + str(total/count)) while(True): input('Enter any input to get a compliment: ') print('That is so clever of you!') my_list = ['duck', 'duck', 'goose'] # A list with 3 items print(my_list[0]) print(my_list[1]) print(my_list[2]) my_list = ['duck', 'duck', 'goose'] my_list[2] = 'bear' print(my_list) my_list = [1, 2, 3] my_list.append(4) print(my_list) # my_list has changed... print(my_list.append(5)) print(my_list) shopping_list = [] item = input('Add an item to the shopping list (or "done"): ') while item.lower() != 'done': shopping_list.append(item) item = input('Add an item to the shopping list (or "done"): ') print('Okay, so that was: ') print(shopping_list) [1, 2, 3] + [4, 5, 6] print(len('Hello')) print(len([1, 2, 3])) my_items = ['eggs', 'flour', 'milk'] print(len(my_items), 'items') print(my_items[2]) print(my_items[len(my_items)-1]) planet_diameter_km = [4879, 12104, 12756, 6792, 142984, 120536, 51118, 49528, 2377] planet_diameter_km.sort() planet_diameter_km my_list1 = [3, 2, 1] my_list2 = my_list1 my_list1.sort() print(my_list1) print(my_list2) my_list1 = [3, 2, 1] my_list2 = my_list1.copy() my_list1.sort() print(my_list1) print(my_list2) honors = ['Albert', 'Berenice', 'Chen', 'Dominique'] mentioned_honors = [] nonhonors = [] student = input('Enter a name (or "done"): ') while (student != 'done'): if student in honors: print('Honors!') mentioned_honors.append(student) else: print('Not honors...') nonhonors.append(student) student = input('Enter a name (or "done"): ') print('Honors mentioned: ' + str(mentioned_honors)) print('Nonhonors mentioned: ' + str(nonhonors)) # Lecture5MorePower.py percent = input('Enter a percentage between 0 and 100:') if float(percent) >= 0 and float(percent) <= 100: if float(percent) >= 10: print('A decent return on investment....') else: print('Not a great return on investment....') else: print('That is not in the requested range!') vip = False spent = 10 if vip or spent >= 10000: print('Send this person a loyalty reward!') else: print('This person deserves nothing!') vip = False if not vip: print('Have you considered signing up to join the VIP program?') else: print('Welcome back, VIP customer!') vip = False spent = 0 if not vip or spent < 10000: # "not" applied to vip before "or" print('Please spend more') else: print('Hello, valued patron!') vip = False spent = 0 if not (vip or spent < 10000): # within parens evaluates to True print('Please spend more') else: print('Hello, valued patron!') my_list = [1,2,3] my_list2 = [7,8,9] if not 4 in my_list and not 4 in my_list2: print('No 4 found') my_list = [1,2,3] my_list2 = [7,8,9] if 4 not in my_list and not in my_list2: print('This will actually cause an error - not how "in" works') import math math.sqrt(2) import math as m m.sqrt(2) from math import sqrt as my_sqrt my_sqrt(2) get_ipython().system('python3 -m ensurepip --upgrade') get_ipython().system('pip install seaborn') import seaborn as sns df = sns.load_dataset("penguins") # Load a dataset about penguins sns.jointplot(data=df, x="flipper_length_mm", y="bill_length_mm", hue="species") import statistics statistics.median([1, 2, 3, 4]) import statistics statistics.median([1, 2, 3, 4]) total = 0 count = 0 value_str = input('Enter a number, or "done" if done: ') while value_str != 'done': count = count + 1 value_int = int(value_str) total = total + value_int value_str = input('Enter a number, or "done" if done: ') if count > 0: print('The average is ' + str(total/count)) total = 0 count = 0 value_str = input('Enter a non-negative integer, or "done" if done: ') while value_str != 'done': if not value_str.isdigit(): print('Non-negative integers only!') else: count = count + 1 value_int = int(value_str) total = total + value_int value_str = input('Enter a non-negative integer, or "done" if done: ') if count > 0: print('The average is ' + str(total/count)) total = 0 count = 0 value_str = input('Enter a number, or "done" if done: ') while value_str != 'done': count = count + 1 value_int = int(value_str) total = total + value_int print(value_str) if count > 0: print('The average is ' + str(total/count)) 3 = my_list total = 0 count = 0 value_str = input('Enter a number, or "done" if done: ') count = count + 1 value_int = int(value_str) total = total + value_int if count > 0: print('The average is ' + str(total/count)) # Lecture6and7Iteration.py people = ['Alice', 'Bob', 'Che'] index = 0 while index < len(people): person = people[index] print('Hooray for ' + person + '!') index += 1 people = ['Alice', 'Bob', 'Che'] for person in people: print('Hooray for ' + person + '!') running_total = 0 numbers = [1,2,3,4,10] for n in numbers: running_total = running_total + n # Could be abbreviated running_total += n print('Sum so far: ' + str(running_total)) print('Sum: ' + str(running_total)) my_grades = [4, 3, 2, 3, 4] letter_grades = [] for g in my_grades: if g == 4: letter_grades.append('A') elif g == 3: letter_grades.append('B') elif g == 2: letter_grades.append('C') print(letter_grades) temps_f = [36, 39, 45, 56, 66, 76, 81, 80, 72, 61, 51, 41] # Jan through Dec temps_c = [] for t in temps_f: degrees_c = (t - 32)*5/9 temps_c.append(round(degrees_c, 2)) # Round to 2 decimal places temps_c my_car = ("Honda Fit", 2010, 30, 10000) print(my_car) car_type, year, mpg, price = my_car print(mpg) print(my_car[0] + ' prints successfully') # OK my_car[0] = 'bad value' # Not OK, trying to change the tuple my_movies = [("No", 4), ("Rogue One", 4.5), ("Casablanca", 5)] for moviename, stars in my_movies: # Notice the two variable names print ('I would rate ' + moviename + ' ' + str(stars) + ' stars') my_movies = [("No", 4), ("Rogue One", 4.5), ("Casablanca", 5)] best_rating = 0 # Initialize with a value that is definitely beat best_movie = "none" for movie, rating in my_movies: if rating > best_rating: best_rating = rating best_movie = movie print("Best movie: " + best_movie + "...rating = " + str(best_rating)) movies = ['Fall Guy', 'Free Guy', 'Cable Guy'] ratings = [5, 4, 3] for movie, rating in zip(movies, ratings): print("I'd rate " + movie + " a " + str(rating)) sw_movies = [('The Phantom Menace', 52), ('Attack of the Clones', 65), ('Revenge of the Sith', 80), ('Rogue One', 84), ('Solo', 70), ('Star Wars', 92), ('The Empire Strikes Back',94), ('Return of the Jedi', 82), ('The Force Awakens', 93), ('The Last Jedi', 90), ('The Rise of Skywalker', 51)] my_list = [] for movie, score in sw_movies: if score >= 80: my_list.append(movie) print(my_list) for i in range(5): print ("Iteration " + str(i)) for i in range(1,6): print(i) my_itinerary = ['Boston', 'Atlanta', 'LA', 'Seattle'] for idx in range(len(my_itinerary)-1): # Avoid indexing out of bounds print(my_itinerary[idx] + '-' + my_itinerary[idx+1]) names = ['Alice', 'Bob', 'Charlie', 'Dora'] for number, name in enumerate(names): print(name + ' ' + str(number)) for movie, rating in sw_movies: print('Looking at ' + movie) if movie == 'Rogue One': print('The rating of Rogue One is ' + str(rating)) break # We don't need to look at any other entries print('Done') my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2], [100.2, 99.9, 100.0, 103.1]] my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2], [100.2, 99.9, 100.0, 103.1]] my_two_stock_histories[1] my_two_stock_histories = [[40.1, 40.2, 39.9, 40.2], [100.2, 99.9, 100.0, 103.1]] my_two_stock_histories[1][2] my_stock_histories = my_two_stock_histories.copy() my_stock_histories.append([5.0, 9.0, 6.0, 7.0]) print(my_stock_histories) print('Stock 0 closing prices: ') for price in my_stock_histories[0]: print(price) print('Starting prices for all stocks:') for stock_list in my_stock_histories: print(stock_list[0]) letters = ['a', 'b', 'c','d','e','f','g','h','i','j'] print('All possible coordinates in Battleship:') for l in letters: for n in range(1,11): print(l + str(n)) bills = [[1, 2, 3], [4,5,6], [7,8,9]] my_totals = [] # empty list for l in bills: print('new list') listsum = 0 for l2 in l: # iterating over the list we got from the outer foreach print('adding ' + str(l2)) listsum += l2 my_totals.append(listsum) print('Bill sums:' + str(my_totals)) print('Possible matchups:') players = ['Alice', 'Bobby', 'Caspar', 'Dmitri', 'Eve'] for white_player in players: for black_player in players: print("White: " + white_player + "; Black player: " + black_player) print('Possible matchups:') players = ['Alice', 'Bobby', 'Caspar', 'Dmitri', 'Eve'] for white_player in players: for black_player in players: if not white_player == black_player: print("White: " + white_player + "; Black player: " + black_player) my_multiples_of_3 = [v * 3 for v in range(5)] my_multiples_of_3 unrounded = [1.9, 5.3, 9.9] rounded = [round(i,0) for i in unrounded] rounded unrounded = [1.9, 5.3, 9.9] rounded = [] for item in unrounded: rounded.append(round(item,0)) print(rounded) temps_f = [36, 39, 45, 56, 66, 76, 81, 80, 72, 61, 51, 41] # Jan through Dec temps_c = [round((t-32)*5/9,2) for t in temps_f] temps_c times = [(2,30), (4,10), (1, 30), (0,40), (0, 20)] minutes = [t[0]*60 + t[1] for t in times] minutes # Lecture8and9Functions.py def add_an_s(string): new_string = string + 's' return new_string add_an_s('example') + '!' records = read_customer_data('input.csv') sales = 0 purchase_counts = [] s_names = [] for record in records: name, purchase_list, sale_info = parse_record(record) s_names.append(standardize_name(name)) sales = update_total_sales(sales, sale_info) update_purchase_counts(purchase_counts, purchase_list) write_to_file(s_names, purchase_counts, sales, 'output.csv') def add_two(my_number): # Adds two to the argument. return my_number + 2 add_two(2) def count_matches(to_match, my_list): # Counts how many times to_match appears in my_list count = 0 for m in my_list: if to_match == m: count += 1 return count print(count_matches(5, [5, 6, 7, 5])) print(count_matches("foo", ["foo","bar","baz"])) def percent_gain(start, finish): return (finish-start)/start * 100 print(percent_gain(36585.06, 33147.25)) print(percent_gain(4796.56, 3839.50)) print(percent_gain(15832.80, 10466.48)) def get_rating(movie_tuple): # More readable way to access a movie rating return movie_tuple[1] get_rating(('Portrait of a Lady on Fire', 5)) def with_tax(price, tax): return round(price * (1 + tax * .01), 2) with_tax(1,8.6) from datetime import date def greet_user(): print("Hello, user!") print("Today's date is " + str(date.today())) greet_user() def greet_user(): print("Hello, user!") print("Today's date is " + str(date.today())) return print(greet_user()) def longest_customer_name(list_of_names): # Find the longest customer name, and how long it is # (maybe so we can display the names nicely later) longest_len = 0 longest_name = "" for n in list_of_names: if len(n) > longest_len: longest_len = len(n) longest_name = n return longest_name, longest_len name, length = longest_customer_name(['Alice', 'Bob', 'Cassia']) print(name) print(length) from statistics import mean def min_mean_max(L): return min(L), mean(L), max(L) min_mean_max([1,2,3,4,5]) def count_items(lst): # Count items but warn if the list is empty if (len(lst) == 0): print('Warning: empty list passed to count_items!') return 0 print("We don't get here with an empty list") return len(lst) count_items([]) def is_prime(n): for i in range(2, n): # Look for a divisor if n % i == 0: # i divides n evenly, no remainder return False return True # didn't find a divisor print(is_prime(11)) print(is_prime(4)) def longest_customer_name(list_of_names): # Find the longest customer name, and how long it is # (maybe so we can display the names nicely later) longest_len = 0 longest_name = "" for n in list_of_names: if len(n) > longest_len: longest_len = len(n) longest_name = n return longest_name, longest_len def count_matches(to_match, my_list): # Counts how many times to_match appears in my_list count = 0 for m in my_list: if to_match == m: count += 1 return count def count_longest_name(list_of_names): # Count how many times the longest name appears in the list # Makes use of functions defined above word, length = longest_customer_name(list_of_names) return count_matches(word,list_of_names) count_longest_name(['Alice','Bob','Catherine','Catherine']) def all_names_short_enough1(names, limit): for name in names: if len(name) > limit: return False return True print(all_names_short_enough1(['Alice', 'Bob'], 3)) print(all_names_short_enough1(['Alice', 'Bob'], 5)) def all_names_short_enough2(names, limit): name, length = longest_customer_name(names) return length <= limit print(all_names_short_enough2(['Alice', 'Bob'], 3)) print(all_names_short_enough2(['Alice', 'Bob'], 5)) def add5(arg): b = arg + 5 return b add5(7) # Return 12 def pattern_a(price, tax): return price * (1 + 0.01 * tax) # Everything we need is in the arguments - good tax = 20 # Global variable - this is worse style def pattern_b(price): return price * (1 + 0.01 * tax) # Works, but less flexible, hard to debug print(pattern_a(100,20)) print(pattern_b(100)) def add_two(my_number): a = my_number + 2 # Shadows outer "a", now we have two a's and see this one print("a is " + str(a) + " inside add_two") return a a = 5 print("add_two(2) is " + str(add_two(2))) print("a is " + str(a) + " outside add_two") my_list = ['a','b','c'] def concatenate_all(my_list): out = '' for item in my_list: out += item return out print(concatenate_all(['d','e'])) # ['d','e'] is called my_list in the function print(concatenate_all(my_list)) # my_list is still a,b,c names = ["Catherine", "Donovan", "alice", "BOB"] standardized_names = [] for name in names: name = name.capitalize() # Capitalize first letter, lc others standardized_names.append(name) standardized_names.sort() jobs = ['Pilot', 'teacheR', 'firefighter', 'LIBRARIAN'] standardized_jobs = [] for job in jobs: job = job.capitalize() standardized_jobs.append(job) standardized_jobs.sort() print(standardized_names) print(standardized_jobs) names = ["Catherine", "Donovan", "alice", "BOB"] jobs = ['Pilot', 'teacheR', 'firefighter', 'LIBRARIAN'] def standardize_strings(string_list): out = [] for s in string_list: s = s.capitalize() out.append(s) out.sort() return out standard_names = standardize_strings(names) standard_jobs = standardize_strings(jobs) print(standard_names) print(standard_jobs) def get_first_letter(word): """ Returns the first letter of a string. word (str): The string to get the letter from. A simple function just for demo purposes. Probably not useful since get_first_letter takes more characters to type than string[0]. """ return word[0] get_ipython().run_line_magic('pinfo', 'get_first_letter') print(get_first_letter("Shibboleth") == "S") print(pattern_a(100,20) == 120) print(pattern_a(0, 20) == 0) print(count_matches("A",[]) == 0) print(count_matches("A", ["A","A","A"]) == 3) # Lecture10Hashes.py my_menu_dict = { "Salmon": 25, "Steak": 30, "Mac and cheese" : 18 } print(my_menu_dict["Salmon"]) my_menu_dict = {} # empty dictionary my_menu_dict["Salmon"] = 25 my_menu_dict["Steak"] = 30 my_menu_dict["Mac and cheese"] = 18 print(my_menu_dict["Salmon"]) my_dict = {} my_dict.get('sushi', 0) two_cities = """It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of light, it was the season of darkness, it was the spring of hope, it was the winter of despair.""" worddict = {} wordlist = two_cities.split() for word in wordlist: if word in worddict: # Check for presence of key worddict[word] += 1 else: worddict[word] = 1 print(worddict["age"]) print(worddict["of"]) for word, count in worddict.items(): print(word + ":" + str(count)) def word_prob(word, worddict): numerator = worddict.get(word, 0) denominator = 0 for word, count in worddict.items(): denominator += count return numerator / denominator print(word_prob('winter', worddict)) # Should be 1/60 = 0.0167 or so print(word_prob('season', worddict)) # Should be 2/60 = 0.0333 or so print(word_prob('Pokemon', worddict)) # Should be 0 with no errors bigIPs = {"209.85.231.104", "207.46.170.123", "72.30.2.43"} bigIPs.add("208.80.152.2") len(bigIPs) newset = set() newset.add("First item") print("First item" in newset) myset = set(range(123456789)) # {0, 1, 2, ...} mylist = list(range(123456789)) # [0, 1, 2, ...] 12345678 in myset # Fast, uses hash 12345678 in mylist # Slower, check each item two_cities_extended = """It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way--in short, the period was so far like the present period that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only. There were a king with a large jaw and a queen with a plain face, on the throne of England; there were a king with a large jaw and a queen with a fair face, on the throne of France. In both countries it was clearer than crystal to the lords of the State preserves of loaves and fishes, that things in general were settled for ever. It was the year of Our Lord one thousand seven hundred and seventy-five. Spiritual revelations were conceded to England at that favoured period, as at this. Mrs. Southcott had recently attained her five-and-twentieth blessed birthday, of whom a prophetic private in the Life Guards had heralded the sublime appearance by announcing that arrangements were made for the swallowing up of London and Westminster. Even the Cock-lane ghost had been laid only a round dozen of years, after rapping out its messages, as the spirits of this very year last past (supernaturally deficient in originality) rapped out theirs. Mere messages in the earthly order of events had lately come to the English Crown and People, from a congress of British subjects in America: which, strange to relate, have proved more important to the human race than any communications yet received through any of the chickens of the Cock-lane brood. """ wordlist = two_cities_extended.split() def find_by_list(wordlist): for word in wordlist: if word in wordlist: continue # Move on to next loop get_ipython().run_line_magic('time', 'find_by_list(wordlist)') worddict = {} for word in wordlist: if word in worddict: worddict[word] += 1 else: worddict[word] = 1 def find_by_dict(wordlist, dict): for word in wordlist: if word in dict: continue # Move on to next iteration of the for loop get_ipython().run_line_magic('time', 'find_by_dict(wordlist,worddict)') mydict = {"a":1000} dict2 = mydict # gets the address, so any changes are permanent to the original dict2["b"] = 500 print(mydict) print(dict2) dict3 = dict2.copy() dict3["c"] = 40 print(dict2) print(dict3) from string import ascii_lowercase myset = set() for i in range(len(two_cities_extended)): myset.add(two_cities_extended[i].lower()) def checkletters(myset): for c in ascii_lowercase: # TODO check whether this letter appeared in myset, maybe return a value if c not in myset: print("Missing: " + c) return False print("All found") return True checkletters(myset) # Lecture11and12NumpyMatplotlib.py import numpy as np v = np.array([1, 2 ,3]) print(v) A = np.array([[1, 0, 0], [0 ,2, 0], [0, 0, 3]]) # 3x3 with 1,2,3 along the diagonal print(A) print(A.shape) # Tuples: like lists, but use () instead of [] print(v.shape) # 1d outputs a comma to indicate it's still a tuple v1 = v print(v1) v2 = np.array([4, 5, 6]) print(v2) print("Adding 1D arrays: ", v1 + v2) print("Subtracting 1D arrays: ", v1 - v2) print("Multiplying 1D arrays: ", v1 * v2) print("Dividing 1D arrays: ", v1 / v2) print(v1) print("Adding by a constant: ", v1 + 2) print("Subtracting by a constant: ", v1 - 2) print("Multiplying by a constant: ", v1 * 2) print("Dividing by a constant: ", v1 / 2) my_array = np.array([[1,2,3], [4,5,6]]) print(np.min(my_array, axis=0)) print(np.mean(my_array, axis=1)) B = np.array([[3, 2], [4, -1]]) w = np.array([1, -1]) z = B @ w print(z) my_array = np.array([8, 6, 7, 5, 3, 0, 9]) print(my_array[1:3]) # prints index 1 and 2, not 3 print(my_array) print(my_array[1:]) my_array[:3] my_matrix = np.array([[42.3, 71.1, 92], [40.7, 70.0, 85], [47.6, 122.0, 82]]) print(my_matrix) two_by_two_square = my_matrix[1:, :2] print(two_by_two_square) no_last_column = my_matrix[:, :2] # no temperature print(no_last_column) import numpy as np a = np.array([0, 1, 2, 3, 4, 5]) print(a) b = a[1:3] print(b) b[1] = 100 # modify the slice... print(a) # ...and see the original change print(np.zeros(3)) #create an array of zeros with length 3 print(np.zeros((2, 3))) # create a 2x3 matrix of zeros import matplotlib.pyplot as plt x = [1, 2, 3] y = [1, 4, 9] plt.plot(x, y) plt.show() import numpy as np my_points = np.array([[2, 1], [3, 4], [5, 6]]) # Each list is a point print(my_points) plt.plot(my_points[:, 0], my_points[:,1]) # Slice to get x values separate from y values plt.show() plt.plot(my_points[:, 0], my_points[:, 1], 'ro') # 'r' is for red, 'o' is for circles plt.show() distances_millions_miles = [35, 67, 93, 142, 484, 889, 1790, 2880] plt.plot(np.arange(1, 9), distances_millions_miles, 'o') plt.show() np.arange(1,9) xpoints = np.linspace(0, 10, 100) ypoints = xpoints ** 2 + 1 plt.plot(xpoints, ypoints) plt.show() plt.plot(my_points[:, 0], my_points[:, 1], 'ro') myfit_x = np.linspace(1, 5, 100) myfit_y = np.linspace(1.5, 5.5, 100) # Same y/x slope for all segments - so, a line plt.plot(myfit_x,myfit_y) plt.show() import matplotlib.pyplot as plt x = [1, 2, 3] y1 = [1, 2, 3] y2 = [3, 2, 1] plt.plot(x, y1, label='Sales') plt.plot(x, y2, label='Quality') plt.legend() plt.title('Trends') plt.grid(True) customers = ['Oliver', 'Sophia', 'Liam', 'Arielle', 'Noah'] total_purchases = [56, 73, 24, 48, 88] plt.bar(customers, total_purchases) plt.xlabel("Customer name", fontsize=14) plt.ylabel("Total purchases", fontsize=14) plt.title("Total purchases for 5 Amazon customers", fontsize=16) plt.tick_params(axis='x', labelsize=14) plt.tick_params(axis='y', labelsize=14) plt.show() # Lecture13BiggerPrograms.py """ Compute f-measure for each item in a list. Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn) (these stand for true positive, false positive, etc) Returns: a list of floats, the f-measures. """ """ Compute the f-measure, a performance measure that ignores true negatives. Arguments: tp (int): the count of true positives fp (int): the count of false negatives tn (int): the count of true negatives fn (int): the count of false negatives Returns: a float, the f-measure. """ def f_measures(stats_list): """ Compute f-measure for each item in a list. Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn) (these stand for true positive, false positive, etc) Returns: a list of floats, the f-measures. """ for tp, fp, tn, fn in stats_list: f = f_measure(tp, fp, tn, fn) def f_measures(stats_list): """ Compute f-measure for each item in a list. Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn) (these stand for true positive, false positive, etc) Returns: a list of floats, the f-measures. """ out = [] for tp, fp, tn, fn in stats_list: f = f_measure(tp, fp, tn, fn) out.append(f) return f def f_measure(tp, fp, tn, fn): """ Compute the f-measure, a performance measure that ignores true negatives. Arguments: tp (int): the count of true positives fp (int): the count of false negatives tn (int): the count of true negatives fn (int): the count of false negatives Returns: a float, the f-measure. """ precision = tp/(tp + fp) recall = tp/(tp + fn) return (2 * precision * recall)/(precision + recall) def f_measure(precision, recall): """ Compute the f-measure, a performance measure that ignores true negatives. Arguments: precision (float): proportion of positive classifications that are correct recall (float): proportion of positive examples that were found Returns: a float, the f-measure. """ return (2 * precision * recall)/(precision + recall) def precision(tp, fp): return tp/(tp + fp) def recall(tp, fn): tp/(tp + fn) def f_measures(stats_list): """ Compute f-measure for each item in a list. Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn) (these stand for true positive, false positive, etc) Returns: a list of floats, the f-measures. """ out = [] for tp, fp, tn, fn in stats_list: f = f_measure(precision(tp, fp), recall(tp, fn)) out.append(f) return f print(precision(4,4)) # Expect 0.5 print(recall(4,4)) # Expect 0.5 print(f_measure(1, 1)) # Expect 1 def recall(tp, fn): print(tp/(tp + fn)) recall(4,4) def recall(tp, fn): print(tp/(tp + fn)) return tp/(tp + fn) recall(4,4) def f_measure(precision, recall): """ Compute the f-measure, a performance measure that ignores true negatives. Arguments: precision (float): proportion of positive classifications that are correct recall (float): proportion of positive examples that were found Returns: a float, the f-measure. """ return (2 * precision * recall)/(precision + recall) def precision(tp, fp): return tp/(tp + fp) def recall(tp, fn): return tp/(tp + fn) def f_measures(stats_list): """ Compute f-measure for each item in a list. Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn) (these stand for true positive, false positive, etc) Returns: a list of floats, the f-measures. """ out = [] for tp, fp, tn, fn in stats_list: f = f_measure(precision(tp, fp), recall(tp, fn)) out.append(f) return f print(precision(4,4)) # Expect 0.5 print(recall(4,4)) # Expect 0.5 print(f_measure(1, 1)) # Expect 1 print(precision(0, 4)) # Expect 0 print(precision(0, 0)) # Expect ... oh, I guess we didn't think about this. 0? print(precision(4, 0)) # Expect 1 print(recall(0, 4)) # Expect 0 print(recall(0, 0)) # Similarly to precision, let's return 0 print(recall(4, 0)) # Expect 1 print(f_measure(0, 0)) # Expect 0 print(f_measure(0.5, 0.5)) # Expect 0.5 def f_measure(precision, recall): """ Compute the f-measure, a performance measure that ignores true negatives. Arguments: precision (float): proportion of positive classifications that are correct recall (float): proportion of positive examples that were found Returns: a float, the f-measure. """ return (2 * precision * recall)/(precision + recall) def precision(tp, fp): if tp + fp == 0: return 0 return tp/(tp + fp) def recall(tp, fn): if tp + fn == 0: return 0 return tp/(tp + fn) def f_measures(stats_list): """ Compute f-measure for each item in a list. Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn) (these stand for true positive, false positive, etc) Returns: a list of floats, the f-measures. """ out = [] for tp, fp, tn, fn in stats_list: f = f_measure(precision(tp, fp), recall(tp, fn)) out.append(f) return f print(precision(4,4)) # Expect 0.5 print(recall(4,4)) # Expect 0.5 print(f_measure(1, 1)) # Expect 1 print(precision(0, 4)) # Expect 0 print(precision(0, 0)) # Expect 0 print(precision(4, 0)) # Expect 1 print(recall(0, 4)) # Expect 0 print(recall(0, 0)) # Similarly to precision, let's return 0 print(recall(4, 0)) # Expect 1 print(f_measure(0, 0)) # Expect 0 print(f_measure(0.5, 0.5)) # Expect 0.5 def f_measure(precision, recall): """ Compute the f-measure, a performance measure that ignores true negatives. Arguments: precision (float): proportion of positive classifications that are correct recall (float): proportion of positive examples that were found Returns: a float, the f-measure. """ if precision + recall == 0: return 0 return (2 * precision * recall)/(precision + recall) def precision(tp, fp): if tp + fp == 0: return 0 return tp/(tp + fp) def recall(tp, fn): if tp + fn == 0: return 0 return tp/(tp + fn) def f_measures(stats_list): """ Compute f-measure for each item in a list. Argument: stats_list (list): a list of tuples of four ints, (tp, fp, tn, fn) (these stand for true positive, false positive, etc) Returns: a list of floats, the f-measures. """ out = [] for tp, fp, tn, fn in stats_list: f = f_measure(precision(tp, fp), recall(tp, fn)) out.append(f) return f print(precision(4,4)) # Expect 0.5 print(recall(4,4)) # Expect 0.5 print(f_measure(1, 1)) # Expect 1 print(precision(0, 4)) # Expect 0 print(precision(0, 0)) # Expect 0 print(precision(4, 0)) # Expect 1 print(recall(0, 4)) # Expect 0 print(recall(0, 0)) # Similarly to precision, let's return 0 print(recall(4, 0)) # Expect 1 print(f_measure(0, 0)) # Expect 0 print(f_measure(0.5, 0.5)) # Expect 0.5 # Lecture14Pandas.py import pandas as pd import numpy as np s1 = pd.Series([-3, -1, 1, 3, 5]) print(s1) print(s1.index) s1[:2] # First 2 elements print(s1[[2,1,0]]) # Elements out of order type(s1) s1[s1 > 0] s2 = pd.Series(np.random.rand(5), index=['a', 'b', 'c', 'd', 'e']) print(s2) print(s2.index) print(s2['a']) data = {'pi': 3.14159, 'e': 2.71828} # dictionary print(data) s3 = pd.Series(data) print(s3) my_array = s3.values print(my_array) import numpy as np my_data = np.array([[5, 5, 4], [2, 3, 4]]) hotels = pd.DataFrame(my_data, index = ["Alice rating", "Bob rating"], columns = ["Hilton", "Marriott", "Four Seasons"]) hotels from google.colab import files uploaded = files.upload() # pick starbucks_drinkMenu_expanded.csv get_ipython().system('ls') import pandas as pd df = pd.read_csv('starbucks_drinkMenu_expanded.csv', index_col = 'Beverage') df.head() sorted_df = df.sort_values(by = "Calories", ascending=False) sorted_df.head() hotels hotels['Hilton'] sum = 0 for i in hotels['Hilton']: sum += i print('Average Hilton Rating: ' + str(sum/len(hotels['Hilton']))) hotels.loc['Bob rating'] hotels.loc['Bob rating', 'Marriott'] hotels.iloc[1, 1] print(hotels.iloc[0, 1:2]) print(hotels.loc['Bob rating', ['Marriott', 'Hilton']]) (df['Calories'] > 300) df[df['Calories'] > 300].head() df[(df['Calories'] > 300) & (df['Beverage_prep'] == 'Soymilk')].head() df['bad_fat'] = df['Trans_Fat_g'] + df['Saturated_Fat_g'] df.head() size_ounces_dict = {'Short': 8, 'Tall': 12, 'Grande': 16, 'Venti': 20} ounces_list = [] for drink in df['Beverage_prep']: ounces_list.append(size_ounces_dict.get(drink, -1)) df['ounces'] = ounces_list df.head() def size_to_ml(size_name): size_ounces_dict = {'Short': 8, 'Tall': 12, 'Grande': 16, 'Venti': 20} return size_ounces_dict.get(size_name,0) * 29.5735 ml = df['Beverage_prep'].map(size_to_ml) print(ml) # Lecture15Pandas.py import pandas as pd df = pd.read_csv('starbucks_drinkMenu_expanded.csv', index_col = 'Beverage') df.head() print(df.loc[:, "Protein_g"].mean()) print(df.loc[:, "Protein_g"].max()) print(df.loc[:, "Protein_g"].idxmax()) # "argmax," gives index with biggest value df.describe() df.corr(numeric_only=True) # New to pandas 2.0.0: chokes on strings without added arg df.columns df.dtypes string = 'string' string[:-1] df['Vitamin_A'] = df['Vitamin_A'].str[0:-1] # Remove the % at the end df['Vitamin_A'] df['Vitamin_A'] = pd.to_numeric(df['Vitamin_A']) df.dtypes df['Vitamin_A'] = df['Vitamin_A'].astype('float64') df.dtypes df.corr(numeric_only=True) df.isnull().sum() df = df.dropna(axis=0, how="any") # Remove the offending row df.isnull().sum() calorie_max = 0 best_name = "" for index, row in df.iterrows(): if row['Calories'] > calorie_max: calorie_max = row['Calories'] best_name = index print(best_name) protein = df.loc[:, "Protein_g"] protein.hist(bins=20); # Create a histogram with 20 equally spaced bins for the data subplot = df[["Protein_g", "Vitamin_A"]] # Notice another way to get desired columns subplot.boxplot(); # Boxplots give median value, middle 50% of data, and range of non-outliers from google.colab import files uploaded = files.upload() # pick titanic.csv df = pd.read_csv('titanic.csv', index_col = 'PassengerId') df.head() df.columns df.dtypes df.describe() df.corr(numeric_only=True) males = df[df['Sex'] == 'male'] males.head() males.describe() females = df[df['Sex'] == 'female'] females.describe() df['sex_numeric'] = df['Sex'] == 'female' df.corr(numeric_only=True) third_class = df[df['Pclass'] == 3] second_class = df[df['Pclass'] == 2] first_class = df[df['Pclass'] == 1] third_class['Survived'].hist(); second_class['Survived'].hist(); first_class['Survived'].hist(); # Lecture16Strings.py my_cost = 12.95821 print(f'The total cost was {my_cost} dollars') print(f'The total cost was {my_cost:.2f} dollars') groceries = "milk,eggs,yogurt" grocerieslist = groceries.split(',') print(grocerieslist) ','.join(['milk', 'eggs', 'yogurt']) ' milk,eggs,yogurt '.strip() lines = "SERVANT: Sir, there are ten thousand--\nMACBETH: Geese, villain?" linelist = lines.splitlines() # A shortcut for split('\n') for line in linelist: if line.startswith("MACBETH"): print(line.split(": ")[1]) print('Wow\n\twow!') print("foo" in "food") print("foodfood".replace("foo", "ra")) import numpy as np import pandas as pd my_data = np.array([["Excellent", " Okay ", " Okay"], ["Great ", " Good", " Good"]]) df = pd.DataFrame(my_data, columns = ["Hilton", "Marriott", "Four Seasons"], index = ["Alice", "Bob"]) df marriott = df['Marriott'] for s in marriott: print(s) print('---') for s in marriott.str.strip(): print(s) # Look, no extra whitespace marriott.str.match("\s*Okay\s*") import re pattern = '02143' longstring = 'Somerville, MA 02143' result = re.search(pattern, longstring) if result: # (if it's not None) print(result.group()) longstring = '0132428190214200' pattern2 = '02143' result2 = re.search(pattern2, longstring) print(result2) pattern3 = '\d\d\d\d\d' longstring = 'Somerville, MA 02143' result3 = re.search(pattern3, longstring) if result3: print(result3.group()) longstring = 'My phone number is 5555555' pattern4 = 'phone number is \d+' result4 = re.search(pattern4, longstring) if result4: print(result4.group()) longstring = 'Call me at 555-5555' pattern5 = '\d\d\d-?\d\d\d\d' result5 = re.search(pattern5, longstring) if result5: print(result5.group()) longstring = "Call me at 1-800-555-5555." pattern = "(\d-)?(\d\d\d-)?\d\d\d-?\d\d\d\d" result = re.search(pattern, longstring) if result: print(result.group()) longstring2 = "Call me at 555-5555." result = re.search(pattern, longstring2) if result: print(result.group()) pattern = "Somerville, (MA|NJ)" longstring = "Somerville, NJ 02143" result = re.search(pattern, longstring) if result: print(result.group()) longstring = "States with a Somerville: AL, IN, ME, MA, NJ, OH, TN, TX" pattern = "[A-Z][A-Z]" # Get capital letters within A-Z range result = re.findall(pattern, longstring) print(result) longstring = "The stock NVDA went down 4.54 points" pattern = "stock (\w+) went down (\d+\.\d+) points" result = re.search(pattern, longstring) if result: print(result.group()) print(result.group(1)) # Subgroup 1, the first () in the pattern print(result.group(2)) import re longstring = "We paid $100 for those shoes" pattern = '\$\d+' result = re.search(pattern, longstring) print(result.group()) # Lecture18Objects.py class Car: pass car1 = Car() car2 = Car() car3 = Car() print(isinstance(car1,Car)) car1.year = 2010 car1.make = "Honda" car1.model = "Fit" car1.color = "blue" car2.year = 2013 car2.make = "Toyota" car2.model = "Camry" car2.color = "silver" print(f"This car is a {car1.year} {car1.color} {car1.make} {car1.model}") my_car = (2010, 'Honda', 'Fit', 'blue') print(f"This car is a {my_car[0]} {my_car[3]} {my_car[1]} {my_car[2]}") class Car: def print_facts(self): print(f"This car is a {self.year} {self.color} {self.make} {self.model}") car1 = Car() car2 = Car() car1.year = 2010 car1.make = "Honda" car1.model = "Fit" car1.color = "blue" car2.year = 2013 car2.make = "Toyota" car2.model = "Camry" car2.color = "silver" car1.print_facts() car2.print_facts() class Car: def __init__(self, year, make, model, color): # It's common for the constructor's arguments # to have similar or identical names to the attributes they set # (but we still have to say one should be set to the other) self.year = year self.make = make self.model = model self.color = color def print_facts(self): print(f"This car is a {self.year} {self.color} {self.make} {self.model}") car1 = Car(2010, "Honda", "Fit", "blue") car2 = Car(2013, "Toyota", "Camry", "silver") car1.print_facts() car2.print_facts() def newest_car(list_of_cars): if not list_of_cars: # ie, empty list return None best_year = list_of_cars[0].year best_car = list_of_cars[0] for car in list_of_cars: # This warning message could prevent a bug if we try # to hand this function the wrong list if not isinstance(car, Car): print('Warning, list had non-car items!') elif car.year > best_year: best_year = car.year best_car = car return best_car newest_car([car1, car2]).print_facts() class Bill: """ Represents a bill at a restaurant. _items (list of tuples): list of (item name, cost) tuples """ def __init__(self, items): self._items = items # "Getter" def items(self): return self._items # "Setter" def set_items(self, items): self._items = items def total_cost_pretax(self): total = 0 for name, cost in self._items: total += cost return total def total_cost_with_tax(self, tax_rate): return round(self.total_cost_pretax() * (1 + tax_rate), 2) my_lunch = [("Ham Sandwich", 9), ("Coke", 2)] new_bill = Bill(my_lunch) cost_with_tax = new_bill.total_cost_with_tax(0.08) print(f"Total cost: {cost_with_tax}") new_bill.items() # could have said new_bill._items, but we were told not to class Bill: """ Represents a bill at a restaurant. _item_names (list of strings): list of items on bill _item_costs (list of ints): list of prices of items on bill _items is not here anymore! sorry anybody who wrote code that uses it, we warned you! """ def __init__(self, items): self._item_names = [item[0] for item in items] self._item_costs = [item[1] for item in items] # "Getter" def items(self): # list(zip(a, b)) returns a list of tuples combining a and b return list(zip(self._item_names, self._item_costs)) # "Setter" def set_items(self, items): self._item_names = [item[0] for item in items] self._item_costs = [item[1] for item in items] def total_cost_pretax(self): total = 0 for name, cost in self._items: total += cost return total # Notice that we can call another method with this one def total_cost_with_tax(self, tax_rate): return round(self.total_cost_pretax() * (1 + tax_rate), 2) my_lunch = [("Ham Sandwich", 9), ("Coke", 2)] new_bill = Bill(my_lunch) print(new_bill.items()) # this still works, but _items would have broken class Circle: def __init__(self, radius): if radius < 0: raise ValueError("Can't have negative circle radius") self.radius=radius Circle(-1) class Circle2: def __init__(self,radius=2): self.radius = radius Circle2().radius class Student: def __init__(self, age, major, year): self.age = age self.major = major self.year = year def get_older(self, amount): self.age += amount bob = Student(20,"Biology","Sophomore") bob.get_older(2) print(bob.age) car1 = Car(2010, "Honda", "Fit", "blue") car2 = car1 car2.color = "black" car1.print_facts() # It's black now car2.print_facts() import copy car2 = copy.copy(car1) car2.color = "white" car1.print_facts() car2.print_facts() from google.colab import files uploaded = files.upload() # import books.csv import pandas as pd df = pd.read_csv('books.csv', index_col = 'title') df.head() class Book: def __init__(self, title, author, average_rating): self.title = title self.author = author self.average_rating = average_rating # Could add more fields from the dataset if desired class Publisher: def __init__(self, df, publisher_name): self.name = publisher_name self.books = [] for row in df.itertuples(): if row.publisher == publisher_name: self.books.append(Book(row.Index, row.authors, row.average_rating)) def average_rating(self): total = 0 for book in self.books: total += book.average_rating return total/len(self.books) scholastic = Publisher(df,'Scholastic Inc.') scholastic.average_rating() # Lecture19MoreOO.py class Client: # both Faculty and Students def __init__(self, birthyear, uid): self.birthyear = birthyear self.uid = uid def get_uid(self): return self.uid def get_birthyear(self): return self.birthyear class Student(Client): # inherit from Client def __init__(self, birthyear, uid, gradyear): self.birthyear = birthyear self.uid = uid self.gradyear = gradyear def get_gradyear(self): return self.gradyear class Faculty(Client): pass # Nothing else we want to do for Faculty alice = Student(2003, 123456789, 2024) print(alice.get_birthyear()) # Inherited from Client print(alice.get_uid()) # Inherited from Client print(alice.get_gradyear()) # Specific to Student person1 = Student(2000,123456,2025) if not isinstance(person1, Faculty): print("Hey, this person doesn't have permission to do this!") else: print("Welcome, Faculty number " + str(person1.uid) + "!") student1 = Student(2000,123456,2025) print(isinstance(student1,Student)) print(isinstance(student1,Client)) print(isinstance(student1,object)) # Every class inherits from object class Student(Client): # inherit from Client def __init__(self, birthyear, uid, gradyear): super().__init__(birthyear, uid) self.gradyear = gradyear def get_gradyear(): return self.gradyear bob = Student(2002,987654321,2022) print(bob.get_uid()) # inherited from Client class Trip: def __init__(self,cost,start_date,end_date): self.cost = cost self.start_date = start_date self.end_date = end_date self.reimbursed = False def cost(self): return self.cost def reimburse(self): self.reimbursed = True def dates(self): return self.startDate, self.endDate class EquipmentOrder: def __init__(self,cost,domestic_seller): self.cost = cost self.reimbursed = False self.domestic_seller = domestic_seller def cost(self): return self.cost def reimburse(self): self.reimbursed = True def domestic_seller(self): return self.domestic_seller class Expense: def __init__(self,cost): self.cost = cost self.reimbursed = False def cost(self): return self.cost def reimburse(self): self.reimbursed = True class Trip(Expense): def __init__(self,cost,start_date,end_date): super().__init__(cost) self.start_date = start_date self.end_date = end_date # inherit cost, reimburse def dates(self): return self.start_date, self.end_date class EquipmentOrder(Expense): def __init__(self,cost,domestic_seller): super().__init__(cost) self.domestic_seller = domestic_seller # inherit cost, reimburse def domestic_seller(self): return self.domestic_seller class Employee: def __init__(self, name, salary, title, years_of_service): self.name = name self.salary = salary self.title = title self.years_of_service = years_of_service def give_raise(self, raise_amount): self.salary += raise_amount def change_title(self, new_title): self.title = new_title def update_years_of_service(self, increase): self.years_of_service += increase class Contractor: def __init__(self, name, salary, contract_duration): self.name = name self.salary = salary self.contract_duration = contract_duration def give_raise(self, raise_amount): self.salary += raise_amount alice = Employee("Alice", 90000, "Manager", 7) alice.give_raise(10000) print(alice.salary) bob = Contractor("Bob", 80000, 2) bob.give_raise(10000) print(bob.salary) class Worker: def __init__(self, name, salary): self.name = name self.salary = salary def give_raise(self, raise_amount): self.salary += raise_amount class Employee(Worker): def __init__(self, name, salary, title, years_of_service): super().__init__(name, salary) self.title = title self.years_of_service = years_of_service def change_title(self, new_title): self.title = new_title def update_years_of_service(self, increase): self.years_of_service += increase class Contractor(Worker): def __init__(self, name, salary, contract_duration): super().__init__(name, salary) self.contract_duration = contract_duration alice = Employee("Alice", 90000, "Manager", 7) alice.give_raise(10000) print(alice.salary) bob = Contractor("Bob", 80000, 2) bob.give_raise(10000) print(bob.salary) class Gradyear: def __init__(self, year): self.year = year year = Gradyear(2024) print(year) class Gradyear: def __init__(self, year): self.year = year def __str__(self): # Our own implementation return str(self.year) gradyear = Gradyear(2024) print(gradyear) gy1 = Gradyear(2024) gy2 = Gradyear(2024) print(gy1 == gy2) myset = set() myset.add(gy1) myset.add(gy2) len(myset) class Gradyear: def __init__(self, year): self.year = year def __str__(self): # Our own implementation return str(self.year) def __eq__(self, other): return self.year == other.year def __hash__(self): return self.year # Just store by number itself gy1 = Gradyear(2024) gy2 = Gradyear(2024) print(gy1 == gy2) myset = set() myset.add(gy1) myset.add(gy2) len(myset) # Lecture20Recursion.py def bad_recursion(): print("Bad!") bad_recursion() bad_recursion() def factorial(n): # Omitting checks to make sure we're a natural number, etc if n == 1: return 1 return n * factorial(n-1) print (factorial(4)) def factorial(n): # Omitting checks to make sure we're a natural number, etc print(f'Evaluating {n}!') if n == 1: print('Returning 1') return 1 result = n * factorial(n-1) print(f'Returning {result}') return result print (factorial(4)) def sum_m_to_n(m, n): if n == m: return m result = n + sum_m_to_n(m, n-1) return result sum_m_to_n(3, 7) # 3 + 4 + 5 + 6 + 7 = 25 def sum_m_to_n(m, n): print(f'Evaluating sum from {m} to {n}') if n == m: print(f'Returning {m}') return m result = n + sum_m_to_n(m, n-1) print(f'Returning {result}') return result sum_m_to_n(3, 7) # 3 + 4 + 5 + 6 + 7 = 25 def mypow(a, p): if p == 0: return 1 result = a * mypow(a, p-1) return result mypow(2,8) def mypow(a, p): print(f'Evaluating {a}^{p}') if p == 0: print('Returning 1') return 1 result = a * mypow(a, p-1) print(f'Returning {result}') return result mypow(2,8) def fib(n): if (n == 0): return 0 if (n == 1): return 1 return fib(n-1) + fib(n-2) for i in range(10): print(fib(i)) def r_perm(r, n): if n == r+1: return n return n * r_perm(r,n-1) r_perm(5,7) def iter_factorial(n): running_fact = 1 for i in range(1,n+1): running_fact *= i return running_fact print(iter_factorial(4)) import numpy as np def iter_fib(n): if n == 0 or n == 1: return n fibs = np.zeros(n+1) fibs[0] = 0 fibs[1] = 1 for i in range(2,n+1): fibs[i] = fibs[i-1] + fibs[i-2] return int(fibs[n]) for i in range(10): print(iter_fib(i)) def power_set(setstring): if len(setstring) == 0: return [""] subset_list = [] # Recursive call gets all the subsets that don't involve the first character smaller_power_set = power_set(setstring[1:]) # The starting character is either in the subset... for substring in smaller_power_set: subset_list.append(setstring[0] + substring) # ...or not. for substring in smaller_power_set: subset_list.append(substring) return subset_list power_set("abcd") def recursive_sum(lst): if not lst: # empty list return 0 return lst[0] + recursive_sum(lst[1:]) recursive_sum([1,2,3]) def recursive_filter(min_val, lst): if not lst: return [] if lst[0] >= min_val: return [lst[0]] + recursive_filter(min_val, lst[1:]) else: return recursive_filter(min_val, lst[1:]) recursive_filter(3, [1, 2, 3, 4, 5]) def recursive_index(item, lst, index): # index tracks where we are in the list if not lst: return None # not found if lst[0] == item: return index return recursive_index(item,lst[1:],index+1) recursive_index(5, [0, 1, 2, 5], 0) def recursive_skiplist(lst): if len(lst) == 0: return [] if len(lst) == 1: return lst return [lst[0]] + recursive_skiplist(lst[2:]) recursive_skiplist([5,3,7,2,9]) # Lecture21DataStructures.py class ll_node: def __init__(self, num): self.number = num self.next = None def append(self, num): if self.next == None: # End of the list - add the node self.next = ll_node(num) else: self.next.append(num) # Recursively append to rest of list def contains(self, othernum): if self.number == othernum: # We found it return True elif self.next == None: # We reached the end, didn't find it return False # Not here, there's more list - so, keep looking (recursively) return self.next.contains(othernum) def __str__(self): if self.next == None: # Last number return str(self.number) # Print this and print the rest (more recursion) return str(self.number) + ' ' + str(self.next) mylist = ll_node(6) mylist.append(1) mylist.append(7) print(mylist) print('Contains 7: ' + str(mylist.contains(7))) print('Contains 5: ' + str(mylist.contains(5))) import numpy as np class dynamic_array: # Showing how Python lists work def __init__(self, initial_size): self.memory = np.zeros(initial_size) self.occupied = 0 self.size = initial_size def __str__(self): return str(self.memory) def append(self, val): if self.occupied == self.size: print('Resizing...') new_memory = np.zeros(self.size*2) # A "hiccup" in running time as everything's copied for i in range(len(self.memory)): new_memory[i] = self.memory[i] self.memory = new_memory self.size = self.size*2 print('Adding ' + str(val)) self.memory[self.occupied] = val self.occupied += 1 my_array = dynamic_array(2) print(my_array) my_array.append(1) my_array.append(1) print(my_array) my_array.append(1) print(my_array) my_array.append(1) print(my_array) class FolderTree: # binary left and right are its fields def __init__(self, val): self.left = None self.right = None self.val = val def addLeft(self, node): self.left = node def addRight(self, node): self.right = node def find(self, v): if self.val == v: return True # "if self.left" is checking that self.left exists - # else error when we run self.left.find() if self.left and self.left.find(v): return True if self.right and self.right.find(v): return True return False leftleftchild = FolderTree("wow.exe") leftrightchild = FolderTree("xls.exe") rightleftchild = FolderTree("lec12.pdf") rightrightchild = FolderTree("lec14.pdf") leftparent = FolderTree("apps") rightparent = FolderTree("lecs") leftparent.addLeft(leftleftchild) leftparent.addRight(leftrightchild) rightparent.addLeft(rightleftchild) rightparent.addRight(rightrightchild) root = FolderTree("root") root.addLeft(leftparent) root.addRight(rightparent) print(root.find("wow.exe")) print(root.find("lec13.exe")) def count_nodes(tree): if tree == None: return 0 return 1 + count_nodes(tree.left) + count_nodes(tree.right) count_nodes(root) def calc_depth(tree): if tree is None: return 0 if tree.left is None and tree.right is None: return 0 # Leaf has depth 0 in its subtree return 1 + max(calc_depth(tree.left), calc_depth(tree.right)) calc_depth(root) class BinarySearchTree: # binary left and right are its fields def __init__(self, val): self.left = None self.right = None self.val = val def addLeft(self, node): self.left = node def addRight(self, node): self.right = node def find(self, v): if self.val == v: return True if v < self.val: if self.left: print("Going Left") return self.left.find(v) else: return False else: if self.right: print("Going Right") return self.right.find(v) else: return False root = BinarySearchTree("m") leftparent = BinarySearchTree("f") rightparent = BinarySearchTree("q") leftleftchild = BinarySearchTree("a") leftrightchild = BinarySearchTree("h") rightleftchild = BinarySearchTree("o") rightrightchild = BinarySearchTree("u") leftparent.addLeft(leftleftchild) leftparent.addRight(leftrightchild) rightparent.addLeft(rightleftchild) rightparent.addRight(rightrightchild) root.addLeft(leftparent) root.addRight(rightparent) print(root.find("h")) print(root.find("d")) class infect_tree: # name is a string, infects is a list of infect_tree's infected def __init__(self, name, infects): self.name = name self.infects = infects jake = infect_tree('jake', []) eric = infect_tree('eric', []) fifi = infect_tree('fifi', []) ged = infect_tree('ged', []) hao = infect_tree('hao', []) idris = infect_tree('idris', [jake]) bob = infect_tree('bob', [eric]) che = infect_tree('che', []) daphne = infect_tree('daphne', [fifi, ged, hao, idris]) alice = infect_tree('alice', [bob, che, daphne]) def find_most_infections(my_tree): best_infects = len(my_tree.infects) best_name = my_tree.name for infect in my_tree.infects: name, infects = find_most_infections(infect) # Recursion... if infects > best_infects: best_infects = infects best_name = name return best_name, best_infects find_most_infections(alice) def find_all_descendants(my_tree): my_list = [my_tree.name] for infect in my_tree.infects: my_list += find_all_descendants(infect) # More recursion return my_list find_all_descendants(daphne) # Lecture22ScikitLearn.py from sklearn.datasets import load_digits import matplotlib.pyplot as plt digits = load_digits() print(digits.data.shape) # Examples x 64 pixels import matplotlib.pyplot as plt plt.gray() plt.matshow(digits.images[0]) # Notice images[0] is 2D from warnings import simplefilter simplefilter(action='ignore', category=FutureWarning) from sklearn.neighbors import KNeighborsClassifier nbrs = KNeighborsClassifier(n_neighbors=3).fit(digits.data, digits.target) nbrs.score(digits.data, digits.target) # Find accuracy on the training dataset from sklearn.model_selection import train_test_split data_train, data_test, label_train, label_test = train_test_split(digits.data, digits.target, test_size=0.2) nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train) nbrs.score(data_test,label_test) print(nbrs.predict(data_test[0:3])) def reshape_and_show(num, data_test): image = data_test[num].reshape(8,8) plt.matshow(image) reshape_and_show(0,data_test) reshape_and_show(1,data_test) reshape_and_show(2,data_test) from sklearn.datasets import fetch_lfw_people faces = fetch_lfw_people(min_faces_per_person = 100) plt.imshow(faces.images[5], cmap="gray") data_train, data_test, label_train, label_test = train_test_split(faces.data, faces.target, test_size=0.2) nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train) nbrs.score(data_test,label_test) import random random.seed(110) # Set seed - comment this out to get different rolls print(random.randint(1,8)) # Normally produces random integer 1-8 print(random.randint(1,8)) data_train, data_test, label_train, label_test = train_test_split(faces.data, faces.target, test_size=0.2, random_state=110) # Set the seed nbrs = KNeighborsClassifier(n_neighbors=3).fit(data_train, label_train) nbrs.score(data_test,label_test) from sklearn.model_selection import cross_val_score cross_val_score(nbrs, data_train, label_train) import numpy as np for i in range(1,10): nbrs = KNeighborsClassifier(n_neighbors=i) print(np.mean(cross_val_score(nbrs, data_train, label_train))) # Lecture23DecisionTrees.py import math yes_branch_entropy = 0 no_branch_entropy = -0.2 * math.log(0.2,2) - 0.8 * math.log(0.8, 2) pr_yes = 5/2005 pr_no = 2000/2005 print(pr_yes * yes_branch_entropy + pr_no * no_branch_entropy) from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() iris.feature_names iris.target_names iris.data[0] features_train, features_test, labels_train, labels_test = \ train_test_split(iris.data, iris.target, test_size=0.1, random_state=110) from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import cross_val_score dtree = DecisionTreeClassifier(criterion="entropy", random_state=110) dtree.fit(features_train, labels_train) dtree.score(features_test, labels_test) # Gives accuracy import matplotlib.pyplot as plt from sklearn import tree plt.figure(figsize=(14,10)) tree.plot_tree(dtree, feature_names = iris.feature_names, class_names = iris.target_names) # Lecture24RandomForestsOnly.py from sklearn.datasets import load_iris from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split import numpy as np iris = load_iris() iris["feature_names"] features_train, features_test, labels_train, labels_test = \ train_test_split(iris['data'], iris['target'], test_size=0.1,random_state=110) irisforest = RandomForestClassifier(n_estimators=200,criterion="entropy",random_state=110) irisforest.fit(features_train, labels_train) irisforest.score(features_test, labels_test) irisforest.feature_importances_ # Lecture25Regression.py import numpy as np x = np.linspace(1984, 2016, 33) y = [48.0, 47.3, 47.2, 47.4, 47.2, 46.7, 49.7, 49.6, 46.4, 47.3, 47.7, 47.8, 47.3, 47.4, 50.4, 49.8, 47.5, 49.1, 49.4, 47.1, 47.6, 48.4, 50.1, 48.3, 48.6, 47.8, 50.4, 49.7, 51.4, 48.8, 47.7, 48.5, 50.3] import matplotlib.pyplot as plt plt.plot(x,y,'o') import sklearn.linear_model as lm from sklearn.linear_model import LinearRegression linear_model = LinearRegression() x = x.reshape(-1,1) linear_model.fit(x,y) y_hat = linear_model.predict(x) plt.plot(x,y,'o') plt.plot(x,y_hat,'r') print(f'The temperature is rising {linear_model.coef_[0]:.4f} degrees F per year') print(f'{linear_model.intercept_:.2f}') linear_model.score(x,y) methane = np.array([12.81, 25.15, 38.06, 49.47, 60.24, 71.32, 80.08, 94.14, 96.49, 100.32, 107.54, 111.50, 113.97, 120.26, 132.39, 134.82, 133.30, 132.60, 135.91, 140.65, 135.76, 136.14, 138.11, 145.90, 152.41, 157.13, 162.33, 167.15, 172.17, 177.86, 190.62, 200.65, 207.73]) mass_co = [84, 82.7, 84.9, 81.7, 81.9, 79.2, 79.9, 85.9, 84.3, 81.9, 82.9, 82.8,83.7, 85, 83.6, 85, 77.1, 80.4, 77.2, 70.6, 72.0, 68.1, 61.9, 65.7, 63.8, 65.6, 63.9] y_from_90 = y[6:] # From the last example, these are the temperatures methane_from_90 = methane[6:] x = np.transpose(np.array([mass_co, methane_from_90])) x temp_model = LinearRegression() temp_model.fit(x,y_from_90) print(temp_model.coef_) print(temp_model.intercept_) from sklearn.tree import DecisionTreeRegressor import numpy as np import matplotlib.pyplot as plt model = DecisionTreeRegressor() # no pruning of any kind, so expect overfitting x = np.linspace(1984, 2016, 33) x = x.reshape(-1,1) y = [48.0, 47.3, 47.2, 47.4, 47.2, 46.7, 49.7, 49.6, 46.4, 47.3, 47.7, 47.8, 47.3, 47.4, 50.4, 49.8, 47.5, 49.1, 49.4, 47.1, 47.6, 48.4, 50.1, 48.3, 48.6, 47.8, 50.4, 49.7, 51.4, 48.8, 47.7, 48.5, 50.3] xtrain = x[:30] ytrain = y[:30] model.fit(xtrain,ytrain) yhat = model.predict(x) plt.plot(x,y,'o') plt.plot(x[:30],yhat[:30]) plt.plot(x[29:],yhat[29:],'r') # Plot line to test predictions in red model = DecisionTreeRegressor(max_depth = 3) # maybe overdoing it on the pruning x = np.linspace(1984, 2016, 33) prev_value_features = [0] + y.copy()[:-1] # shift y values so we see the previous one; discard last combined_features = np.array([x, prev_value_features]).transpose() print(combined_features) xtrain = combined_features[:30,:] model.fit(xtrain,ytrain) yhat = model.predict(combined_features) plt.plot(x,y,'o') plt.plot(x[:30],yhat[:30]) plt.plot(x[29:],yhat[29:],'r') from sklearn.ensemble import RandomForestRegressor model = RandomForestRegressor() model.fit(xtrain,ytrain) # xtrain has the matrix we made in the previous code box yhat = model.predict(combined_features) plt.plot(x,y,'o') plt.plot(x[:30],yhat[:30]) plt.plot(x[29:],yhat[29:],'r') from sklearn.neighbors import KNeighborsRegressor model = KNeighborsRegressor(n_neighbors=3) model.fit(xtrain,ytrain) # xtrain has the matrix we made in the previous code box yhat = model.predict(combined_features) plt.plot(x,y,'o') plt.plot(x[:30],yhat[:30]) plt.plot(x[29:],yhat[29:],'r') # Lecture26ModernNLPandML.py import pandas as pd SST2_LOC = 'https://github.com/clairett/pytorch-sentiment-classification/raw/master/data/SST2/train.tsv' df = pd.read_csv(SST2_LOC, delimiter='\t', header=None) df import nltk from nltk.tokenize import word_tokenize nltk.download('punkt') # Name means 'period' in German; from Kiss and Strunk 2006 word_tokenize("I won't sell my cat for even $1,000,000,000.") def wordset(raw_text): tokenized = word_tokenize(raw_text.lower()) return set(tokenized) def all_words_set(df_column): set_of_all = set() dict_of_all = {} for row in df_column: textset = wordset(row) set_of_all = set_of_all.union(textset) dict_of_all[row] = textset return set_of_all, dict_of_all def one_hot_columns(df_column): all_words, all_tokenizations = all_words_set(df_column) word_dict = {} for word in all_words: word_present_list = [] for line_num in range(len(df_column)): if word in all_tokenizations[df_column[line_num]]: word_present_list.append(1) else: word_present_list.append(0) word_dict[word] = word_present_list # We can create a dataframe from a dictionary of column header # to list of column values return pd.DataFrame.from_dict(word_dict) one_hot_cols = one_hot_columns(df.iloc[:,0]) one_hot_cols from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier labels = df[1] features = one_hot_cols X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=42) clf = RandomForestClassifier(n_estimators=200, random_state=42) clf.fit(X_train, y_train) clf.score(X_test, y_test) one_hot_cols.sum() import gensim.downloader as api wv = api.load('word2vec-google-news-300') wv['king'] print(wv.most_similar('king')) # Prints words and cosines of angles with 'king' import numpy as np def find_cosine(vec1, vec2): # Scale vectors to both have unit length unit_vec1 = vec1/np.linalg.norm(vec1) unit_vec2 = vec2/np.linalg.norm(vec2) # The dot product of unit vectors gives the cosine of their angle return np.dot(unit_vec1,unit_vec2) print(find_cosine(wv['king'], wv['faucet'])) wv.similarity('king', 'faucet') def find_avg_vector(txt, embedding): words = word_tokenize(txt) vec_sum = None count = 0 for word in words: if word in embedding: count += 1 if vec_sum is not None: vec_sum += embedding[word] else: # The embeddings are read-only unless you copy them vec_sum = embedding[word].copy() if vec_sum is None: return pd.Series(np.zeros((300,))) # Treat no word found in embedding as zero vector return pd.Series(vec_sum/count) find_avg_vector('Long live the king and queen!', wv) df_embeddings = df[0].apply(lambda txt: find_avg_vector(txt, wv)) df_embeddings.rename(columns=lambda x: 'feature'+str(x), inplace=True) df_augmented = pd.concat([df, df_embeddings], axis=1) df_augmented from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier labels = df_augmented[1] features = df_augmented.iloc[:,2:] X_train, X_test, y_train, y_test = train_test_split(features, labels, random_state=42) clf = RandomForestClassifier(n_estimators=200, random_state=42) clf.fit(X_train, y_train) clf.score(X_test, y_test)