Spaces:

tktm8
/

EmpathemePotionBot

Running

EmpathemePotionBot / src /processing /metadata_utils.py

Upload 59 files

fb05e78 verified 3 months ago

1.12 kB

	"""メタデータ生成のユーティリティ関数"""

	from urllib.parse import urlparse


	def generate_file_name(url: str, index: int) -> str:
	"""URLからファイル名を生成（potion_XXX.md形式）

	Args:
	url: 処理対象のURL
	index: URLのインデックス番号

	Returns:
	生成されたファイル名（例: potion_001.md）
	"""
	# URLからIDを抽出するか、インデックスを使用
	url_parts = urlparse(url).path.strip('/').split('/')
	if url_parts and url_parts[-1].isdigit():
	doc_id = url_parts[-1].zfill(3)
	else:
	doc_id = str(index).zfill(3)
	return f"potion_{doc_id}.md"


	def extract_title(content: str) -> str:
	"""Markdownコンテンツからタイトルを抽出

	Args:
	content: Markdownコンテンツ

	Returns:
	抽出されたタイトル（見つからない場合は"Untitled"）
	"""
	lines = content.split('\n')
	for line in lines:
	if line.startswith('# '):
	return line.replace('# ', '').strip()
	return "Untitled"