Spaces:

Adityax-07
/

CodeSage

Sleeping

CodeSage / data /docs /string_algorithms.txt

Aditya

Add LLM vs RAG vs Fine-Tuning comparison project

4a3f117 3 months ago

5.37 kB

	STRING ALGORITHMS
	=================
	Strings are sequences of characters. Most string operations in Python run in O(n).

	--- Basic String Operations ---
	s = "hello world"
	print(len(s)) # 11
	print(s.upper()) # HELLO WORLD
	print(s.lower()) # hello world
	print(s.split()) # ['hello', 'world']
	print(s.replace("world", "Python")) # hello Python
	print(s.startswith("hello")) # True
	print(s.endswith("world")) # True
	print(s.find("world")) # 6 (index)
	print(s.count("l")) # 3
	print(s[::-1]) # dlrow olleh (reverse)
	print("".join(reversed(s))) # same as above

	--- Reverse Words in a Sentence ---
	def reverse_words(sentence):
	return ' '.join(sentence.split()[::-1])

	print(reverse_words("Hello World Python")) # Python World Hello

	--- Check Palindrome ---
	def is_palindrome(s):
	s = ''.join(c.lower() for c in s if c.isalnum())
	return s == s[::-1]

	print(is_palindrome("A man a plan a canal Panama")) # True
	print(is_palindrome("race a car")) # False

	--- Anagram Check ---
	Two strings are anagrams if they have same characters with same frequency.

	from collections import Counter

	def is_anagram(s, t):
	return Counter(s) == Counter(t)

	print(is_anagram("listen", "silent")) # True
	print(is_anagram("hello", "world")) # False

	--- Count Character Frequency ---
	def char_frequency(s):
	freq = {}
	for ch in s:
	freq[ch] = freq.get(ch, 0) + 1
	return freq

	print(char_frequency("abracadabra"))
	# {'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1}

	--- KMP String Matching ---
	Find all occurrences of pattern in text.
	Time: O(n + m). Space: O(m). Efficient, no unnecessary comparisons.

	def kmp_search(text, pattern):
	n, m = len(text), len(pattern)
	if m == 0:
	return [0]

	# Build failure function
	fail = [0] * m
	j = 0
	for i in range(1, m):
	while j > 0 and pattern[i] != pattern[j]:
	j = fail[j - 1]
	if pattern[i] == pattern[j]:
	j += 1
	fail[i] = j

	# Search
	matches = []
	j = 0
	for i in range(n):
	while j > 0 and text[i] != pattern[j]:
	j = fail[j - 1]
	if text[i] == pattern[j]:
	j += 1
	if j == m:
	matches.append(i - m + 1)
	j = fail[j - 1]
	return matches

	print(kmp_search("aabaacaadaabaaba", "aaba")) # [0, 9, 12]

	--- Rabin-Karp Rolling Hash ---
	Find pattern in text using hashing. Avg O(n+m), worst O(nm).

	def rabin_karp(text, pattern):
	n, m = len(text), len(pattern)
	BASE, MOD = 256, 101
	pat_hash = text_hash = 0
	h = pow(BASE, m - 1, MOD)
	matches = []

	for i in range(m):
	pat_hash = (BASE * pat_hash + ord(pattern[i])) % MOD
	text_hash = (BASE * text_hash + ord(text[i])) % MOD

	for i in range(n - m + 1):
	if pat_hash == text_hash:
	if text[i:i+m] == pattern: # verify
	matches.append(i)
	if i < n - m:
	text_hash = (BASE(text_hash - ord(text[i])h) + ord(text[i+m])) % MOD
	if text_hash < 0:
	text_hash += MOD
	return matches

	print(rabin_karp("GEEKS FOR GEEKS", "GEEKS")) # [0, 10]

	--- Longest Common Prefix ---
	def longest_common_prefix(strs):
	if not strs:
	return ""
	prefix = strs[0]
	for s in strs[1:]:
	while not s.startswith(prefix):
	prefix = prefix[:-1]
	if not prefix:
	return ""
	return prefix

	print(longest_common_prefix(["flower","flow","flight"])) # "fl"
	print(longest_common_prefix(["dog","racecar","car"])) # ""

	--- Valid Parentheses ---
	def is_valid_parentheses(s):
	stack = []
	pairs = {')':'(', '}':'{', ']':'['}
	for ch in s:
	if ch in '({[':
	stack.append(ch)
	elif ch in ')}]':
	if not stack or stack[-1] != pairs[ch]:
	return False
	stack.pop()
	return not stack

	print(is_valid_parentheses("()[]{}")) # True
	print(is_valid_parentheses("([)]")) # False

	--- Roman to Integer ---
	def roman_to_int(s):
	values = {'I':1,'V':5,'X':10,'L':50,'C':100,'D':500,'M':1000}
	result = 0
	for i in range(len(s)):
	if i + 1 < len(s) and values[s[i]] < values[s[i+1]]:
	result -= values[s[i]]
	else:
	result += values[s[i]]
	return result

	print(roman_to_int("III")) # 3
	print(roman_to_int("LVIII")) # 58
	print(roman_to_int("MCMXCIV"))# 1994

	--- String Compression ---
	Compress "aabcccdddd" to "a2b1c3d4".

	def compress(s):
	result = []
	i = 0
	while i < len(s):
	ch = s[i]
	count = 0
	while i < len(s) and s[i] == ch:
	count += 1
	i += 1
	result.append(ch + str(count))
	compressed = ''.join(result)
	return compressed if len(compressed) < len(s) else s

	print(compress("aabcccdddd")) # a2b1c3d4
	print(compress("abcd")) # abcd (no gain)

	--- Zigzag String Conversion ---
	Convert string to zigzag pattern across rows.

	def convert_zigzag(s, numRows):
	if numRows == 1 or numRows >= len(s):
	return s
	rows = [''] * numRows
	row, step = 0, 1
	for ch in s:
	rows[row] += ch
	if row == 0: step = 1
	if row == numRows - 1: step = -1
	row += step
	return ''.join(rows)

	print(convert_zigzag("PAYPALISHIRING", 3)) # PAHNAPLSIIGYIR