financial-rag-chatbot / scripts /create_sample_pdfs.py
Claude
Add system verification and sample PDF generation scripts
642c18b unverified
"""
ν…ŒμŠ€νŠΈμš© μƒ˜ν”Œ PDF 파일 생성
"""
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import os
# μƒ˜ν”Œ 금육/경제 λ…Όλ¬Έ λ‚΄μš©
sample_papers = [
{
"filename": "financial_crisis_2008.pdf",
"title": "The 2008 Financial Crisis: Causes and Consequences",
"author": "John Smith",
"content": """
The 2008 financial crisis was one of the most severe economic downturns in history.
The crisis originated in the United States housing market, where subprime mortgage lending
practices led to a housing bubble. When the bubble burst, it triggered a chain reaction
throughout the global financial system.
Key causes included:
1. Excessive risk-taking by financial institutions
2. Inadequate regulatory oversight
3. Complex financial instruments (CDOs, MBS)
4. High leverage ratios in investment banks
5. Rating agency failures
The consequences were devastating, including bank failures, government bailouts,
high unemployment, and a global recession. Central banks responded with unprecedented
monetary policy interventions, including quantitative easing and near-zero interest rates.
"""
},
{
"filename": "portfolio_diversification.pdf",
"title": "Modern Portfolio Theory and Diversification Strategies",
"author": "Jane Doe",
"content": """
Portfolio diversification is a fundamental principle in investment management.
The concept, introduced by Harry Markowitz in 1952, suggests that investors can
reduce risk by holding a variety of assets that are not perfectly correlated.
Key principles:
1. Risk reduction through asset allocation
2. Correlation between assets matters
3. Efficient frontier optimization
4. Risk-return tradeoff
5. Systematic vs unsystematic risk
Diversification benefits include:
- Lower portfolio volatility
- More stable returns over time
- Protection against individual asset failures
- Improved risk-adjusted returns (Sharpe ratio)
However, diversification has limits. During market crashes, correlations tend to
increase, reducing diversification benefits. International diversification can
help but is not a complete solution.
"""
},
{
"filename": "central_bank_policy.pdf",
"title": "Central Bank Monetary Policy and Market Impact",
"author": "Robert Johnson",
"content": """
Central banks play a crucial role in modern economies through monetary policy.
The primary tools include interest rate adjustments, open market operations,
and reserve requirements.
Interest rate policy effects:
1. Lower rates stimulate borrowing and investment
2. Higher rates cool down inflation
3. Impact on currency exchange rates
4. Asset price effects (stocks, bonds, real estate)
5. Transmission mechanism through financial markets
Quantitative easing (QE) emerged as a unconventional tool during the 2008 crisis:
- Large-scale asset purchases
- Expansion of central bank balance sheets
- Lowering long-term interest rates
- Supporting credit markets
The effectiveness of monetary policy depends on various factors including
economic conditions, financial market structure, and policy credibility.
"""
},
{
"filename": "behavioral_finance.pdf",
"title": "Behavioral Finance: Psychological Biases in Investment Decisions",
"author": "Sarah Williams",
"content": """
Behavioral finance challenges the traditional assumption of rational investors.
Research shows that psychological biases systematically affect investment decisions.
Common biases:
1. Overconfidence bias - investors overestimate their abilities
2. Anchoring - relying too heavily on initial information
3. Loss aversion - fear of losses exceeds desire for gains
4. Herding behavior - following the crowd
5. Confirmation bias - seeking confirming information
Market implications:
- Stock market bubbles and crashes
- Momentum and reversal effects
- Value premium anomaly
- January effect and other calendar anomalies
Understanding these biases can help investors make better decisions and
potentially exploit market inefficiencies. However, arbitrage is limited
by risk and implementation costs.
"""
},
{
"filename": "esg_investing.pdf",
"title": "ESG Investing: Sustainable Finance and Performance",
"author": "Michael Brown",
"content": """
Environmental, Social, and Governance (ESG) investing has grown rapidly in recent years.
Investors increasingly consider non-financial factors in their investment decisions.
ESG components:
1. Environmental - climate change, pollution, resource depletion
2. Social - labor practices, human rights, community relations
3. Governance - board structure, executive compensation, shareholder rights
Performance evidence is mixed:
- Some studies show ESG funds outperform
- Others find no significant difference
- Selection bias and data quality issues
- Short track record for many ESG strategies
Benefits beyond returns:
- Risk management (avoiding controversies)
- Long-term value creation
- Alignment with values
- Regulatory compliance
Challenges include greenwashing, lack of standardization, and measuring impact.
"""
}
]
def create_sample_pdf(filepath, title, author, content):
"""PDF 파일 생성"""
c = canvas.Canvas(filepath, pagesize=letter)
width, height = letter
# 제λͺ©
c.setFont("Helvetica-Bold", 16)
c.drawString(50, height - 50, title)
# μ €μž
c.setFont("Helvetica", 12)
c.drawString(50, height - 80, f"Author: {author}")
# λ‚΄μš©
c.setFont("Helvetica", 10)
y_position = height - 120
for line in content.strip().split('\n'):
if y_position < 50: # μƒˆ νŽ˜μ΄μ§€
c.showPage()
c.setFont("Helvetica", 10)
y_position = height - 50
c.drawString(50, y_position, line[:90]) # 쀄 길이 μ œν•œ
y_position -= 15
c.save()
print(f"βœ… 생성됨: {filepath}")
def main():
"""μƒ˜ν”Œ PDF 생성"""
sample_dir = "data/sample_pdfs"
os.makedirs(sample_dir, exist_ok=True)
print("μƒ˜ν”Œ PDF 생성 쀑...")
print("=" * 60)
for paper in sample_papers:
filepath = os.path.join(sample_dir, paper["filename"])
create_sample_pdf(
filepath,
paper["title"],
paper["author"],
paper["content"]
)
print("=" * 60)
print(f"βœ… {len(sample_papers)}개 μƒ˜ν”Œ PDF 생성 μ™„λ£Œ!")
print(f"πŸ“‚ μœ„μΉ˜: {sample_dir}")
if __name__ == "__main__":
main()