FinGraph / scripts /plot_keywords.py
dev-yuje's picture
refactor: clean up root directory by removing cache, old data, and moving scripts
080ff86
# -*- coding: utf-8 -*-
"""
FinGraph ํ‚ค์›Œ๋“œ ์‹œ๊ฐํ™” ์œ ํ‹ธ๋ฆฌํ‹ฐ
- ์ €์ž‘๊ถŒ: (c) 2026 yujetak / FinGraph Contributors (MIT License)
- ์—ญํ• : ์ˆ˜์ง‘๋œ ์ „์ฒด ๋‰ด์Šค ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ๋‚ด AI ๊ด€๋ จ ์ฃผ์š” ํ‚ค์›Œ๋“œ(๊ธฐ์—…/๊ธฐ์ˆ /์„œ๋น„์Šค)์˜ ์ถœํ˜„ ๋นˆ๋„๋ฅผ ๋ถ„์„ํ•˜์—ฌ
์ขŒ์ธก ๋Œ€์‹œ๋ณด๋“œ ํ™”๋ฉด์— ์ ์žฌํ•  ๊ณ ํ’ˆ์งˆ ๋ง‰๋Œ€๊ทธ๋ž˜ํ”„ ์ด๋ฏธ์ง€(keyword_frequencies.png)๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
"""
import os
import dotenv
import matplotlib.pyplot as plt
import neo4j
import pandas as pd
dotenv.load_dotenv()
# Windows ํ™˜๊ฒฝ ํ•œ๊ธ€ ํฐํŠธ ์„ค์ •
plt.rc('font', family='Malgun Gothic')
plt.rcParams['axes.unicode_minus'] = False
def get_neo4j_driver() -> neo4j.Driver:
uri = os.getenv("NEO4J_URI", "neo4j://localhost:7687")
client_id = os.getenv("NEO4J_CLIENT_ID")
client_secret = os.getenv("NEO4J_CLIENT_SECRET")
if client_id and client_secret:
try:
d = neo4j.GraphDatabase.driver(uri, auth=(client_id, client_secret))
d.verify_connectivity()
return d
except Exception:
pass
username = os.getenv("NEO4J_USERNAME", "neo4j")
password = os.getenv("NEO4J_PASSWORD", "password")
d = neo4j.GraphDatabase.driver(uri, auth=(username, password))
d.verify_connectivity()
return d
def create_keyword_plot():
driver = get_neo4j_driver()
query = """
MATCH (a:Article)-[:MENTIONS]->(n)
WHERE NOT n:Content
RETURN n.name AS keyword, count(a) AS freq
ORDER BY freq DESC
LIMIT 20
"""
with driver.session() as session:
res = session.run(query)
data = [dict(record) for record in res]
driver.close()
if not data:
print("ํ‚ค์›Œ๋“œ ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
return
df = pd.DataFrame(data)
# ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„ ๊ทธ๋ฆฌ๊ธฐ (์—ญ์ˆœ์œผ๋กœ ์ •๋ ฌํ•˜์—ฌ ๊ฐ€์žฅ ๋งŽ์€ ๊ฒƒ์ด ์œ„๋กœ ์˜ค๊ฒŒ ํ•จ)
plt.figure(figsize=(10, 8))
bars = plt.barh(df['keyword'][::-1], df['freq'][::-1], color='#3b5a82')
plt.xlabel('์ถœํ˜„ ๋นˆ๋„ (๊ด€๋ จ ๊ธฐ์‚ฌ ์ˆ˜)', fontsize=12)
plt.ylabel('ํ‚ค์›Œ๋“œ (๊ธฐ์—…/๊ธฐ์ˆ /์„œ๋น„์Šค)', fontsize=12)
plt.title('์ƒ์œ„ 20๊ฐœ AI ๊ด€๋ จ ํ‚ค์›Œ๋“œ ์ถœํ˜„ ๋นˆ๋„', fontsize=16, fontweight='bold')
# ๋ง‰๋Œ€ ์˜†์— ์ˆ˜์น˜ ํ…์ŠคํŠธ ํ‘œ์‹œ
for bar in bars:
width = bar.get_width()
plt.text(width + 0.1, bar.get_y() + bar.get_height() / 2, f'{int(width)}',
ha='left', va='center', fontsize=10)
plt.tight_layout()
output_path = 'keyword_frequencies.png'
plt.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"Graph successfully saved to {output_path}")
if __name__ == "__main__":
create_keyword_plot()