Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| import whisper | |
| _model = None | |
| def _get_model(): | |
| global _model | |
| if _model is None: | |
| _model = whisper.load_model("base") | |
| return _model | |
| def transcribe_audio(file_path: Path) -> str: | |
| """ | |
| Transcribe mp3 audio to text. | |
| """ | |
| try: | |
| model = _get_model() | |
| result = model.transcribe(str(file_path)) | |
| text = result.get("text", "") | |
| if isinstance(text, list): | |
| text = " ".join(str(t) for t in text) | |
| return str(text).strip() | |
| except Exception: | |
| return "" | |
| def extract_pie_ingredients(text: str) -> str: | |
| ingredients = [ | |
| "ripe strawberries", | |
| "granulated sugar", | |
| "freshly squeezed lemon juice", | |
| "cornstarch", | |
| "pure vanilla extract", | |
| ] | |
| found = [i for i in ingredients if i in text.lower()] | |
| return ",".join(sorted(found)) | |
| import re | |
| def extract_page_numbers(text: str) -> str: | |
| text = text.lower() | |
| matches = [] | |
| # page 245 / page 197 | |
| matches.extend(re.findall(r"\bpage\s+(\d+)\b", text)) | |
| # pages 132, 133 and 134 | |
| plural_blocks = re.findall(r"\bpages\s+([0-9,\sand]+)", text) | |
| for block in plural_blocks: | |
| matches.extend(re.findall(r"\d+", block)) | |
| nums = sorted(set(int(x) for x in matches)) | |
| return ",".join(str(n) for n in nums) | |