vedam_ai / file_handler.py
vikramvasudevan's picture
Upload folder using huggingface_hub
6992a5d verified
import os
from pathlib import Path
from config import VedamConfig
def write_to_file_and_create_dir(file_path_str, content):
"""
Writes content to a specified file, creating parent directories if they don't exist.
Args:
file_path_str (str): The path to the file, including its name.
content (str): The string content to write to the file.
"""
file_path = Path(file_path_str)
# Create parent directories if they don't exist
file_path.parent.mkdir(parents=True, exist_ok=True)
# Write content to the file
with open(file_path, "w", encoding="utf-8") as f:
f.write(content)
def page_text_generator(output_dir: str):
for filename in sorted(
os.listdir(output_dir), key=lambda f: int(f.strip("page").strip(".txt"))
):
if filename.endswith(".txt"):
page_num = int(filename.strip("page").strip(".txt"))
file_path = os.path.join(output_dir, filename)
with open(file_path, "r", encoding="utf-8") as f:
text = f.read().strip()
if text:
yield {
"id": str(page_num),
"document": text,
"metadata": {
"page": page_num,
"file": filename,
"num_chars": len(text),
},
}