File size: 1,338 Bytes
58b9d07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b6ba86
 
 
 
 
58b9d07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c5b315
58b9d07
9c5b315
58b9d07
9c5b315
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from pathlib import Path
import whisper


_model = None


def _get_model():
    global _model
    if _model is None:
        _model = whisper.load_model("base")
    return _model


def transcribe_audio(file_path: Path) -> str:
    """
    Transcribe mp3 audio to text.
    """
    try:
        model = _get_model()
        result = model.transcribe(str(file_path))
        text = result.get("text", "")
        if isinstance(text, list):
            text = " ".join(str(t) for t in text)

        return str(text).strip()
    except Exception:
        return ""

def extract_pie_ingredients(text: str) -> str:
    ingredients = [
        "ripe strawberries",
        "granulated sugar",
        "freshly squeezed lemon juice",
        "cornstarch",
        "pure vanilla extract",
    ]

    found = [i for i in ingredients if i in text.lower()]

    return ",".join(sorted(found))

import re


def extract_page_numbers(text: str) -> str:
    text = text.lower()

    matches = []

    # page 245 / page 197
    matches.extend(re.findall(r"\bpage\s+(\d+)\b", text))

    # pages 132, 133 and 134
    plural_blocks = re.findall(r"\bpages\s+([0-9,\sand]+)", text)
    for block in plural_blocks:
        matches.extend(re.findall(r"\d+", block))

    nums = sorted(set(int(x) for x in matches))
    return ",".join(str(n) for n in nums)