CodeSage / data /docs /string_algorithms.txt
Aditya
Add LLM vs RAG vs Fine-Tuning comparison project
4a3f117
Raw
History Blame Contribute Delete
5.37 kB
STRING ALGORITHMS
=================
Strings are sequences of characters. Most string operations in Python run in O(n).
--- Basic String Operations ---
s = "hello world"
print(len(s)) # 11
print(s.upper()) # HELLO WORLD
print(s.lower()) # hello world
print(s.split()) # ['hello', 'world']
print(s.replace("world", "Python")) # hello Python
print(s.startswith("hello")) # True
print(s.endswith("world")) # True
print(s.find("world")) # 6 (index)
print(s.count("l")) # 3
print(s[::-1]) # dlrow olleh (reverse)
print("".join(reversed(s))) # same as above
--- Reverse Words in a Sentence ---
def reverse_words(sentence):
return ' '.join(sentence.split()[::-1])
print(reverse_words("Hello World Python")) # Python World Hello
--- Check Palindrome ---
def is_palindrome(s):
s = ''.join(c.lower() for c in s if c.isalnum())
return s == s[::-1]
print(is_palindrome("A man a plan a canal Panama")) # True
print(is_palindrome("race a car")) # False
--- Anagram Check ---
Two strings are anagrams if they have same characters with same frequency.
from collections import Counter
def is_anagram(s, t):
return Counter(s) == Counter(t)
print(is_anagram("listen", "silent")) # True
print(is_anagram("hello", "world")) # False
--- Count Character Frequency ---
def char_frequency(s):
freq = {}
for ch in s:
freq[ch] = freq.get(ch, 0) + 1
return freq
print(char_frequency("abracadabra"))
# {'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1}
--- KMP String Matching ---
Find all occurrences of pattern in text.
Time: O(n + m). Space: O(m). Efficient, no unnecessary comparisons.
def kmp_search(text, pattern):
n, m = len(text), len(pattern)
if m == 0:
return [0]
# Build failure function
fail = [0] * m
j = 0
for i in range(1, m):
while j > 0 and pattern[i] != pattern[j]:
j = fail[j - 1]
if pattern[i] == pattern[j]:
j += 1
fail[i] = j
# Search
matches = []
j = 0
for i in range(n):
while j > 0 and text[i] != pattern[j]:
j = fail[j - 1]
if text[i] == pattern[j]:
j += 1
if j == m:
matches.append(i - m + 1)
j = fail[j - 1]
return matches
print(kmp_search("aabaacaadaabaaba", "aaba")) # [0, 9, 12]
--- Rabin-Karp Rolling Hash ---
Find pattern in text using hashing. Avg O(n+m), worst O(nm).
def rabin_karp(text, pattern):
n, m = len(text), len(pattern)
BASE, MOD = 256, 101
pat_hash = text_hash = 0
h = pow(BASE, m - 1, MOD)
matches = []
for i in range(m):
pat_hash = (BASE * pat_hash + ord(pattern[i])) % MOD
text_hash = (BASE * text_hash + ord(text[i])) % MOD
for i in range(n - m + 1):
if pat_hash == text_hash:
if text[i:i+m] == pattern: # verify
matches.append(i)
if i < n - m:
text_hash = (BASE*(text_hash - ord(text[i])*h) + ord(text[i+m])) % MOD
if text_hash < 0:
text_hash += MOD
return matches
print(rabin_karp("GEEKS FOR GEEKS", "GEEKS")) # [0, 10]
--- Longest Common Prefix ---
def longest_common_prefix(strs):
if not strs:
return ""
prefix = strs[0]
for s in strs[1:]:
while not s.startswith(prefix):
prefix = prefix[:-1]
if not prefix:
return ""
return prefix
print(longest_common_prefix(["flower","flow","flight"])) # "fl"
print(longest_common_prefix(["dog","racecar","car"])) # ""
--- Valid Parentheses ---
def is_valid_parentheses(s):
stack = []
pairs = {')':'(', '}':'{', ']':'['}
for ch in s:
if ch in '({[':
stack.append(ch)
elif ch in ')}]':
if not stack or stack[-1] != pairs[ch]:
return False
stack.pop()
return not stack
print(is_valid_parentheses("()[]{}")) # True
print(is_valid_parentheses("([)]")) # False
--- Roman to Integer ---
def roman_to_int(s):
values = {'I':1,'V':5,'X':10,'L':50,'C':100,'D':500,'M':1000}
result = 0
for i in range(len(s)):
if i + 1 < len(s) and values[s[i]] < values[s[i+1]]:
result -= values[s[i]]
else:
result += values[s[i]]
return result
print(roman_to_int("III")) # 3
print(roman_to_int("LVIII")) # 58
print(roman_to_int("MCMXCIV"))# 1994
--- String Compression ---
Compress "aabcccdddd" to "a2b1c3d4".
def compress(s):
result = []
i = 0
while i < len(s):
ch = s[i]
count = 0
while i < len(s) and s[i] == ch:
count += 1
i += 1
result.append(ch + str(count))
compressed = ''.join(result)
return compressed if len(compressed) < len(s) else s
print(compress("aabcccdddd")) # a2b1c3d4
print(compress("abcd")) # abcd (no gain)
--- Zigzag String Conversion ---
Convert string to zigzag pattern across rows.
def convert_zigzag(s, numRows):
if numRows == 1 or numRows >= len(s):
return s
rows = [''] * numRows
row, step = 0, 1
for ch in s:
rows[row] += ch
if row == 0: step = 1
if row == numRows - 1: step = -1
row += step
return ''.join(rows)
print(convert_zigzag("PAYPALISHIRING", 3)) # PAHNAPLSIIGYIR