lynn-twinkl commited on
Commit
5375208
·
1 Parent(s): 340a869

Finds applicatins that would be good candiadtes for Twinkl orig

Browse files
Files changed (1) hide show
  1. src/twinkl_originals.py +20 -0
src/twinkl_originals.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+
4
+ def find_book_candidates(df: pd.DataFrame, column: str) -> pd.Series:
5
+
6
+ if column not in df.columns:
7
+ raise KeyError(f"Column '{column}' not found in DataFrame")
8
+
9
+ series = df[column].astype(str)
10
+
11
+ pattern_books = r'\bbooks?\b'
12
+ pattern_level = r'\b(ks1|ks2|primary|eyfs|early years|nursery)\b'
13
+
14
+ has_books = series.str.contains(pattern_books, case=False, na=False)
15
+ is_primary = series.str.contains(pattern_level, case=False, na=False)
16
+
17
+
18
+ return has_books & is_primary
19
+
20
+