Spaces:
Sleeping
Sleeping
File size: 1,338 Bytes
58b9d07 9b6ba86 58b9d07 9c5b315 58b9d07 9c5b315 58b9d07 9c5b315 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | from pathlib import Path
import whisper
_model = None
def _get_model():
global _model
if _model is None:
_model = whisper.load_model("base")
return _model
def transcribe_audio(file_path: Path) -> str:
"""
Transcribe mp3 audio to text.
"""
try:
model = _get_model()
result = model.transcribe(str(file_path))
text = result.get("text", "")
if isinstance(text, list):
text = " ".join(str(t) for t in text)
return str(text).strip()
except Exception:
return ""
def extract_pie_ingredients(text: str) -> str:
ingredients = [
"ripe strawberries",
"granulated sugar",
"freshly squeezed lemon juice",
"cornstarch",
"pure vanilla extract",
]
found = [i for i in ingredients if i in text.lower()]
return ",".join(sorted(found))
import re
def extract_page_numbers(text: str) -> str:
text = text.lower()
matches = []
# page 245 / page 197
matches.extend(re.findall(r"\bpage\s+(\d+)\b", text))
# pages 132, 133 and 134
plural_blocks = re.findall(r"\bpages\s+([0-9,\sand]+)", text)
for block in plural_blocks:
matches.extend(re.findall(r"\d+", block))
nums = sorted(set(int(x) for x in matches))
return ",".join(str(n) for n in nums)
|