CodeSage / data /docs /hashing.txt
Aditya
Add LLM vs RAG vs Fine-Tuning comparison project
4a3f117
Raw
History Blame Contribute Delete
4.53 kB
HASHING AND HASH TABLES
=======================
A hash table stores key-value pairs. Uses a hash function to map keys to indices.
Average time: O(1) for insert, delete, search. Worst case: O(n) with collisions.
Python's dict and set are hash tables.
--- Hash Table Basics ---
# Python dict = hash map
phone_book = {}
phone_book["Alice"] = "555-1234"
phone_book["Bob"] = "555-5678"
print(phone_book["Alice"]) # "555-1234"
print("Alice" in phone_book) # True
del phone_book["Bob"]
print(phone_book.get("Bob", "N/A")) # "N/A"
# Iterate
for name, number in phone_book.items():
print(name, number)
--- Build a Hash Map from Scratch ---
class HashMap:
def __init__(self, size=1000):
self.size = size
self.buckets = [[] for _ in range(size)]
def _hash(self, key):
return hash(key) % self.size
def put(self, key, value):
idx = self._hash(key)
for i, (k, v) in enumerate(self.buckets[idx]):
if k == key:
self.buckets[idx][i] = (key, value)
return
self.buckets[idx].append((key, value))
def get(self, key):
idx = self._hash(key)
for k, v in self.buckets[idx]:
if k == key:
return v
return None
def remove(self, key):
idx = self._hash(key)
self.buckets[idx] = [(k, v) for k, v in self.buckets[idx] if k != key]
hm = HashMap()
hm.put("name", "Alice")
hm.put("age", 25)
print(hm.get("name")) # Alice
hm.remove("age")
print(hm.get("age")) # None
--- Two Sum Problem ---
Find two numbers that add up to target. Classic hash map problem.
Time: O(n). Space: O(n).
def two_sum(nums, target):
seen = {} # value → index
for i, num in enumerate(nums):
complement = target - num
if complement in seen:
return [seen[complement], i]
seen[num] = i
return []
print(two_sum([2, 7, 11, 15], 9)) # [0, 1]
print(two_sum([3, 2, 4], 6)) # [1, 2]
--- Group Anagrams ---
Group words that are anagrams of each other.
Time: O(n * k log k) where k = max word length.
from collections import defaultdict
def group_anagrams(words):
groups = defaultdict(list)
for word in words:
key = ''.join(sorted(word)) # sorted chars as key
groups[key].append(word)
return list(groups.values())
print(group_anagrams(["eat","tea","tan","ate","nat","bat"]))
# [["eat","tea","ate"], ["tan","nat"], ["bat"]]
--- Frequency Counter ---
Count frequency of each element.
from collections import Counter
def top_k_frequent(nums, k):
count = Counter(nums)
return [x for x, _ in count.most_common(k)]
print(top_k_frequent([1,1,1,2,2,3], 2)) # [1, 2]
--- Longest Consecutive Sequence ---
Find length of longest consecutive elements sequence.
Time: O(n). Space: O(n).
def longest_consecutive(nums):
num_set = set(nums)
best = 0
for num in num_set:
if num - 1 not in num_set: # start of sequence
length = 1
while num + length in num_set:
length += 1
best = max(best, length)
return best
print(longest_consecutive([100,4,200,1,3,2])) # 4 (1,2,3,4)
--- Subarray Sum Equals K ---
Count subarrays with sum equal to k. Uses prefix sum + hash map.
Time: O(n). Space: O(n).
def subarray_sum(nums, k):
count = 0
prefix = 0
freq = {0: 1} # prefix_sum → frequency
for num in nums:
prefix += num
if prefix - k in freq:
count += freq[prefix - k]
freq[prefix] = freq.get(prefix, 0) + 1
return count
print(subarray_sum([1, 1, 1], 2)) # 2
print(subarray_sum([1, 2, 3], 3)) # 2
--- Check if Two Strings are Anagram ---
def is_anagram(s, t):
return Counter(s) == Counter(t)
print(is_anagram("anagram", "nagaram")) # True
print(is_anagram("rat", "car")) # False
--- First Non-Repeating Character ---
def first_unique(s):
count = Counter(s)
for i, ch in enumerate(s):
if count[ch] == 1:
return i
return -1
print(first_unique("leetcode")) # 0 (l)
print(first_unique("aabb")) # -1
--- Hash Set Uses ---
# O(1) membership test
visited = set()
visited.add(5)
print(5 in visited) # True
print(10 in visited) # False
# Remove duplicates
arr = [1, 2, 2, 3, 3, 4]
unique = list(set(arr))
print(unique) # [1, 2, 3, 4]
# Intersection, Union
a = {1, 2, 3, 4}
b = {3, 4, 5, 6}
print(a & b) # {3, 4} intersection
print(a | b) # {1,2,3,4,5,6} union
print(a - b) # {1, 2} difference