yomitalk

Sleeping

App Files Files Community

yomitalk / tests /data /create_sample_pdf.py

KyosukeIchikawa

PyMuPDF (fitz) の依存関係を削除し、VOICEVOXの初期化処理を改善

de0b32e about 1 year ago

raw

history blame contribute delete

10.6 kB

	"""
	Script to create a sample PDF for testing the PDF extraction feature.
	"""

	import os

	from reportlab.lib.pagesizes import letter
	from reportlab.pdfgen import canvas

	from tests.utils.logger import test_logger as logger


	def create_sample_pdf(output_path="sample_paper.pdf"):
	"""Create a sample academic paper PDF for testing"""
	# Ensure the output directory exists
	output_dir = os.path.dirname(output_path)
	if output_dir and not os.path.exists(output_dir):
	os.makedirs(output_dir)

	# ページサイズを取得（幅と高さ）
	page_width, page_height = letter

	# Create PDF canvas
	c = canvas.Canvas(output_path, pagesize=letter)

	# 余白を設定
	margin = 50
	text_width = page_width - 2 * margin

	# 行の高さとセクション間のスペースを定義
	line_height = 15
	section_space = 50

	# 現在のY座標（ページ上部から開始）
	y = page_height - margin

	# 最小Y座標（これ以下になったら新しいページ）
	min_y = margin + 50

	# Title
	c.setFont("Helvetica-Bold", 18)
	c.drawString(margin, y, "Sample Paper")
	y -= 30 # タイトルの後のスペース

	# Author information
	c.setFont("Helvetica", 12)
	c.drawString(margin, y, "Author: Taro Yamada")
	y -= 20
	c.drawString(margin, y, "Affiliation: Sample University")
	y -= section_space # 著者情報の後のセクション間スペース

	# Abstract
	c.setFont("Helvetica-Bold", 14)
	c.drawString(margin, y, "Abstract")
	y -= 20

	c.setFont("Helvetica", 12)
	abstract = """
	This is a sample research paper PDF for testing. It is used for functionality
	testing of the Paper Podcast Generator. This test will verify that text is
	correctly extracted from this PDF and properly processed.
	"""

	# Draw multiline text
	lines = abstract.strip().split("\n")
	for line in lines:
	if line.strip(): # 空行をスキップ
	c.drawString(margin, y, line.strip())
	y -= line_height

	# 次のセクションへのスペースを追加
	y -= section_space

	# Introduction
	c.setFont("Helvetica-Bold", 14)
	c.drawString(margin, y, "1. Introduction")
	y -= 20

	c.setFont("Helvetica", 12)
	intro = """
	In recent years, media development for wider dissemination of research papers
	has received attention. Especially, podcast format as audio content helps busy
	researchers and students effectively use their commuting time. This research
	proposes a system that automatically converts research papers into podcast format.

	The importance of research accessibility has been highlighted in numerous studies.
	Traditional research papers are often limited to academic communities, while multimedia
	formats can reach broader audiences including practitioners, policymakers, and the
	general public interested in scientific advancements.
	"""

	lines = intro.strip().split("\n")
	for line in lines:
	if line.strip():
	c.drawString(margin, y, line.strip())
	y -= line_height

	# 次のセクションへのスペースを追加
	y -= section_space

	# Method
	c.setFont("Helvetica-Bold", 14)
	c.drawString(margin, y, "2. Method")
	y -= 20

	c.setFont("Helvetica", 12)
	method = """
	The proposed system converts research papers into podcasts using the following steps:

	1. Text extraction from PDF
	2. Text summarization and formatting
	3. Conversion to podcast format
	4. Audio generation using speech synthesis

	For speech synthesis, character voices specialized for Japanese like "Zundamon"
	are used to provide friendly audio content.

	The system architecture consists of several modular components that can be customized
	based on specific requirements. The PDF parsing module extracts text while preserving
	the document structure, including headings, paragraphs, and references. The summarization
	module employs natural language processing techniques to identify key information and
	create a concise narrative suitable for audio consumption.
	"""

	lines = method.strip().split("\n")
	for line in lines:
	if line.strip():
	# ページの下部に達したら新しいページを開始
	if y < min_y:
	c.showPage()
	y = page_height - margin
	c.setFont("Helvetica", 12)
	c.drawString(margin, y, line.strip())
	y -= line_height

	# 次のセクションへのスペースを追加
	y -= section_space

	# Results
	c.setFont("Helvetica-Bold", 14)
	c.drawString(margin, y, "3. Results")
	y -= 20

	c.setFont("Helvetica", 12)
	results = """
	The evaluation experiments showed that podcasts generated by the proposed system
	achieved 90% information retention compared to manually created ones.
	In user evaluations, the system also received high ratings for the naturalness
	of the voice and the ease of understanding the content.

	Detailed analysis revealed several interesting findings:

	- Audio quality was rated 4.5/5 on average by 50 participants
	- Comprehension tests showed 85% accuracy for technical content
	- Time savings compared to reading the full paper: approximately 75%
	- User satisfaction was significantly higher (p<0.01) for papers with
	clear structure and well-defined sections

	These results suggest that automated paper-to-podcast conversion can successfully
	translate complex research into accessible audio format while maintaining the
	essential information and scientific integrity of the original work.
	"""

	lines = results.strip().split("\n")
	for line in lines:
	if line.strip():
	# ページの下部に達したら新しいページを開始
	if y < min_y:
	c.showPage()
	y = page_height - margin
	c.setFont("Helvetica", 12)
	c.drawString(margin, y, line.strip())
	y -= line_height

	# 次のセクションへのスペースを追加
	y -= section_space

	# Conclusion
	c.setFont("Helvetica-Bold", 14)

	# ページの下部に達したら新しいページを開始
	if y < min_y:
	c.showPage()
	y = page_height - margin

	c.drawString(margin, y, "4. Conclusion")
	y -= 20

	c.setFont("Helvetica", 12)
	conclusion = """
	In this research, we proposed an automated paper-to-podcast conversion system
	and confirmed its effectiveness. Future challenges include support for more diverse
	paper styles and multilingual support.

	The system demonstrates the potential of using AI to bridge the gap between
	academic writing and public dissemination of research findings. As research
	output continues to grow exponentially, tools that facilitate knowledge
	transfer will become increasingly important.

	Future work will focus on expanding language support, improving handling of
	complex scientific notation and mathematical formulae, and developing domain-specific
	models for fields such as medicine, physics, and computer science. We also plan to
	explore interactive features that would allow listeners to navigate complex content
	more effectively.
	"""

	lines = conclusion.strip().split("\n")
	for line in lines:
	if line.strip():
	# ページの下部に達したら新しいページを開始
	if y < min_y:
	c.showPage()
	y = page_height - margin
	c.setFont("Helvetica", 12)
	c.drawString(margin, y, line.strip())
	y -= line_height

	# 次のセクションへのスペースを追加
	y -= section_space

	# References
	c.setFont("Helvetica-Bold", 14)

	# ページの下部に達したら新しいページを開始
	if y < min_y:
	c.showPage()
	y = page_height - margin

	c.drawString(margin, y, "References")
	y -= 20

	c.setFont("Helvetica", 12)
	references = [
	"1. Yamada, T. (2023). 'Latest Trends in Speech Synthesis Technology'. Journal of Speech Processing, 15(2), 123-135.",
	"2. Sato, H. (2022). 'Effects of Media Development in Research Paper Dissemination'. Journal of Academic Information, 8(3), 45-52.",
	"3. Yamada, T. & Sato, H. (2023). 'Automatic podcast generation from academic papers'. Journal of AI Applications, 10(4), 210-225.",
	"4. Johnson, L. et al. (2021). 'Converting Scientific Papers to Audio: Challenges and Opportunities'. Proceedings of the International Conference on Audio Technology, 78-92.",
	"5. Garcia, M. (2022). 'Voice Synthesis for Academic Content'. Digital Library Research Journal, 5(1), 45-67.",
	"6. Tanaka, K. (2021). 'Analysis of Information Retention in Different Media Formats'. Cognitive Science Quarterly, 33(2), 228-244.",
	"7. Smith, J. & Brown, K. (2022). 'Accessibility of Research Findings Through Alternative Media'. Journal of Science Communication, 14(3), 112-134.",
	]

	for ref in references:
	# 長い参考文献を折り返す
	words = ref.split()
	line = ""
	for word in words:
	test_line = line + " " + word if line else word
	if c.stringWidth(test_line, "Helvetica", 12) < text_width:
	line = test_line
	else:
	# ページの下部に達したら新しいページを開始
	if y < min_y:
	c.showPage()
	y = page_height - margin
	c.setFont("Helvetica", 12)
	c.drawString(margin, y, line)
	y -= line_height
	line = word
	if line:
	# ページの下部に達したら新しいページを開始
	if y < min_y:
	c.showPage()
	y = page_height - margin
	c.setFont("Helvetica", 12)
	c.drawString(margin, y, line)
	y -= 20 # 参考文献間のスペース

	# PDFを保存（最後のページを確定）
	c.save()

	return output_path


	if __name__ == "__main__":
	# Create a sample PDF when the script is executed
	current_dir = os.path.dirname(os.path.abspath(__file__))
	output_path = os.path.join(current_dir, "sample_paper.pdf")

	created_path = create_sample_pdf(output_path)
	logger.info(f"Sample PDF created: {created_path}")