Spaces:
Runtime error
Runtime error
File size: 1,430 Bytes
7d217f1 6992a5d 7d217f1 6992a5d 7d217f1 6992a5d 7d217f1 6992a5d 7d217f1 6992a5d 7d217f1 6992a5d 7d217f1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | import os
from pathlib import Path
from config import VedamConfig
def write_to_file_and_create_dir(file_path_str, content):
"""
Writes content to a specified file, creating parent directories if they don't exist.
Args:
file_path_str (str): The path to the file, including its name.
content (str): The string content to write to the file.
"""
file_path = Path(file_path_str)
# Create parent directories if they don't exist
file_path.parent.mkdir(parents=True, exist_ok=True)
# Write content to the file
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
def page_text_generator(output_dir: str):
for filename in sorted(
os.listdir(output_dir), key=lambda f: int(f.strip("page").strip(".txt"))
):
if filename.endswith(".txt"):
page_num = int(filename.strip("page").strip(".txt"))
file_path = os.path.join(output_dir, filename)
with open(file_path, "r", encoding="utf-8") as f:
text = f.read().strip()
if text:
yield {
"id": str(page_num),
"document": text,
"metadata": {
"page": page_num,
"file": filename,
"num_chars": len(text),
},
}
|