STRING ALGORITHMS ================= Strings are sequences of characters. Most string operations in Python run in O(n). --- Basic String Operations --- s = "hello world" print(len(s)) # 11 print(s.upper()) # HELLO WORLD print(s.lower()) # hello world print(s.split()) # ['hello', 'world'] print(s.replace("world", "Python")) # hello Python print(s.startswith("hello")) # True print(s.endswith("world")) # True print(s.find("world")) # 6 (index) print(s.count("l")) # 3 print(s[::-1]) # dlrow olleh (reverse) print("".join(reversed(s))) # same as above --- Reverse Words in a Sentence --- def reverse_words(sentence): return ' '.join(sentence.split()[::-1]) print(reverse_words("Hello World Python")) # Python World Hello --- Check Palindrome --- def is_palindrome(s): s = ''.join(c.lower() for c in s if c.isalnum()) return s == s[::-1] print(is_palindrome("A man a plan a canal Panama")) # True print(is_palindrome("race a car")) # False --- Anagram Check --- Two strings are anagrams if they have same characters with same frequency. from collections import Counter def is_anagram(s, t): return Counter(s) == Counter(t) print(is_anagram("listen", "silent")) # True print(is_anagram("hello", "world")) # False --- Count Character Frequency --- def char_frequency(s): freq = {} for ch in s: freq[ch] = freq.get(ch, 0) + 1 return freq print(char_frequency("abracadabra")) # {'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1} --- KMP String Matching --- Find all occurrences of pattern in text. Time: O(n + m). Space: O(m). Efficient, no unnecessary comparisons. def kmp_search(text, pattern): n, m = len(text), len(pattern) if m == 0: return [0] # Build failure function fail = [0] * m j = 0 for i in range(1, m): while j > 0 and pattern[i] != pattern[j]: j = fail[j - 1] if pattern[i] == pattern[j]: j += 1 fail[i] = j # Search matches = [] j = 0 for i in range(n): while j > 0 and text[i] != pattern[j]: j = fail[j - 1] if text[i] == pattern[j]: j += 1 if j == m: matches.append(i - m + 1) j = fail[j - 1] return matches print(kmp_search("aabaacaadaabaaba", "aaba")) # [0, 9, 12] --- Rabin-Karp Rolling Hash --- Find pattern in text using hashing. Avg O(n+m), worst O(nm). def rabin_karp(text, pattern): n, m = len(text), len(pattern) BASE, MOD = 256, 101 pat_hash = text_hash = 0 h = pow(BASE, m - 1, MOD) matches = [] for i in range(m): pat_hash = (BASE * pat_hash + ord(pattern[i])) % MOD text_hash = (BASE * text_hash + ord(text[i])) % MOD for i in range(n - m + 1): if pat_hash == text_hash: if text[i:i+m] == pattern: # verify matches.append(i) if i < n - m: text_hash = (BASE*(text_hash - ord(text[i])*h) + ord(text[i+m])) % MOD if text_hash < 0: text_hash += MOD return matches print(rabin_karp("GEEKS FOR GEEKS", "GEEKS")) # [0, 10] --- Longest Common Prefix --- def longest_common_prefix(strs): if not strs: return "" prefix = strs[0] for s in strs[1:]: while not s.startswith(prefix): prefix = prefix[:-1] if not prefix: return "" return prefix print(longest_common_prefix(["flower","flow","flight"])) # "fl" print(longest_common_prefix(["dog","racecar","car"])) # "" --- Valid Parentheses --- def is_valid_parentheses(s): stack = [] pairs = {')':'(', '}':'{', ']':'['} for ch in s: if ch in '({[': stack.append(ch) elif ch in ')}]': if not stack or stack[-1] != pairs[ch]: return False stack.pop() return not stack print(is_valid_parentheses("()[]{}")) # True print(is_valid_parentheses("([)]")) # False --- Roman to Integer --- def roman_to_int(s): values = {'I':1,'V':5,'X':10,'L':50,'C':100,'D':500,'M':1000} result = 0 for i in range(len(s)): if i + 1 < len(s) and values[s[i]] < values[s[i+1]]: result -= values[s[i]] else: result += values[s[i]] return result print(roman_to_int("III")) # 3 print(roman_to_int("LVIII")) # 58 print(roman_to_int("MCMXCIV"))# 1994 --- String Compression --- Compress "aabcccdddd" to "a2b1c3d4". def compress(s): result = [] i = 0 while i < len(s): ch = s[i] count = 0 while i < len(s) and s[i] == ch: count += 1 i += 1 result.append(ch + str(count)) compressed = ''.join(result) return compressed if len(compressed) < len(s) else s print(compress("aabcccdddd")) # a2b1c3d4 print(compress("abcd")) # abcd (no gain) --- Zigzag String Conversion --- Convert string to zigzag pattern across rows. def convert_zigzag(s, numRows): if numRows == 1 or numRows >= len(s): return s rows = [''] * numRows row, step = 0, 1 for ch in s: rows[row] += ch if row == 0: step = 1 if row == numRows - 1: step = -1 row += step return ''.join(rows) print(convert_zigzag("PAYPALISHIRING", 3)) # PAHNAPLSIIGYIR