| """ |
| smoke_test_rag.py β GraphRAG 3λ μλλ¦¬μ€ νμ₯ κ²μ¦ μ€ν¬λ¦½νΈ |
| ============================================================= |
| μ§μλκΈ° μμ± μ§μ μ±λ΄μΌλ‘μμ μλΉμ€ λͺ©μ μ κ²μ¦ν©λλ€. |
| |
| μλ리μ€: |
| 1. νΉμ κΈ°μ
- "μΉ΄μΉ΄μ€μ AI μλΉμ€ νΈλ λλ?" |
| 2. νΉμ κΈ°μ - "LLM κΈ°μ μ κ°λ°νλ κΈ°μ
λ€μ?" |
| 3. μ 체 νΈλ λ - "κΈμ΅AI λΆμΌμμ κ°μ₯ μ κ·Ήμ μΈ κΈ°μ
TOP 3μ λν μλΉμ€" |
| |
| μ€ν λ°©λ²: |
| python3 tests/smoke_test_rag.py |
| """ |
|
|
| import io |
| import os |
| import sys |
| import time |
|
|
| |
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
|
|
|
|
| |
| sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') |
|
|
| import dotenv |
|
|
| dotenv.load_dotenv() |
|
|
|
|
| |
| def check_graph_structure(): |
| import neo4j |
|
|
| uri = os.getenv("NEO4J_URI", "neo4j://localhost:7687") |
| client_id = os.getenv("NEO4J_CLIENT_ID") |
| client_secret = os.getenv("NEO4J_CLIENT_SECRET") |
| |
| driver = None |
| if client_id and client_secret: |
| try: |
| driver = neo4j.GraphDatabase.driver(uri, auth=(client_id, client_secret)) |
| driver.verify_connectivity() |
| except Exception: |
| driver = None |
| |
| if not driver: |
| username = os.getenv("NEO4J_USERNAME", "neo4j") |
| password = os.getenv("NEO4J_PASSWORD", "password") |
| driver = neo4j.GraphDatabase.driver(uri, auth=(username, password)) |
| driver.verify_connectivity() |
|
|
| print("\n" + "=" * 60) |
| print("π [μ¬μ μ κ²] Neo4j κ·Έλν κ΅¬μ± νν©") |
| print("=" * 60) |
|
|
| |
| queries = { |
| "Article (κΈ°μ¬)": "MATCH (n:Article) RETURN count(n) as cnt", |
| "AICompany (κΈ°μ
)": "MATCH (n:AICompany) RETURN count(n) as cnt", |
| "AITechnology (κΈ°μ )": "MATCH (n:AITechnology) RETURN count(n) as cnt", |
| "AIService (μλΉμ€)": "MATCH (n:AIService) RETURN count(n) as cnt", |
| "AIField (λΆμΌ)": "MATCH (n:AIField) RETURN count(n) as cnt", |
| "Content (μ²ν¬+벑ν°)": "MATCH (n:Content) RETURN count(n) as cnt", |
| "MENTIONS κ΄κ³": "MATCH ()-[r:MENTIONS]->() RETURN count(r) as cnt", |
| "DEVELOPS κ΄κ³": "MATCH ()-[r:DEVELOPS]->() RETURN count(r) as cnt", |
| } |
|
|
| all_ok = True |
| for label, cypher in queries.items(): |
| with driver.session() as s: |
| result = s.run(cypher).single() |
| cnt = result["cnt"] if result else 0 |
| status = "β
" if cnt > 0 else "β οΈ λΉμ΄μμ" |
| if cnt == 0: |
| all_ok = False |
| print(f" {status} {label}: {cnt}κ°") |
|
|
| |
| print() |
| print(" [μν°ν° κ° μ§μ κ΄κ³ μ°κ²°μ± μ κ²]") |
| entity_rel_types = ["DEVELOPS", "INVESTS_IN", "PARTNERS_WITH", "APPLIES", "USED_IN", "RELATED_TO"] |
| total_entity_rels = 0 |
| with driver.session() as s: |
| for rel_type in entity_rel_types: |
| cnt = s.run( |
| f"MATCH ()-[r:{rel_type}]->() RETURN count(r) as cnt" |
| ).single()["cnt"] |
| total_entity_rels += cnt |
| status = "β
" if cnt > 0 else "β οΈ" |
| print(f" {status} {rel_type}: {cnt}κ°") |
|
|
| |
| isolated = s.run( |
| "MATCH (n) WHERE NOT (n)--() AND NOT n:Content RETURN count(n) as cnt" |
| ).single()["cnt"] |
| total_nodes = s.run( |
| "MATCH (n) WHERE NOT n:Content RETURN count(n) as cnt" |
| ).single()["cnt"] |
|
|
| isolation_rate = (isolated / total_nodes * 100) if total_nodes > 0 else 0 |
| iso_status = "β
" if isolation_rate < 20 else "β οΈ κ³ λ¦½ λ
Έλ κ³Όλ€" |
| print(f"\n {iso_status} κ³ λ¦½ λ
Έλ(Content μ μΈ): {isolated}κ° / μ 체: {total_nodes}κ° ({isolation_rate:.1f}%)") |
| print(f" μν°ν° κ° μ§μ κ΄κ³ ν©κ³: {total_entity_rels}κ°") |
|
|
| |
| if total_entity_rels == 0: |
| print("\n β μν°ν° κ° μ§μ κ΄κ³(DEVELOPS/APPLIES λ±)κ° 0κ°μ
λλ€. finGraph.py μ¬μ€ν νμ.") |
| all_ok = False |
|
|
| |
| with driver.session() as s: |
| article_cnt = s.run("MATCH (n:Article) RETURN count(n) as cnt").single()["cnt"] |
| if article_cnt > 0: |
| rels_per_article = total_entity_rels / article_cnt |
| threshold_ok = rels_per_article >= 3.0 |
| t_status = "β
" if threshold_ok else "β οΈ κ΄κ³ λ°λ λΆμ‘±" |
| print(f" {t_status} κΈ°μ¬λΉ νκ· μν°ν° κ΄κ³: {rels_per_article:.1f}κ° (κΆκ³ : 3.0κ° μ΄μ)") |
| if not threshold_ok: |
| all_ok = False |
|
|
| driver.close() |
| print() |
| if not all_ok: |
| print("β μΌλΆ λ
Έλ/κ΄κ³κ° λΉμ΄μκ±°λ μ°κ²°μ±μ΄ λΆμ‘±ν©λλ€. finGraph.py μ€νμΌλ‘ κ·Έλνλ₯Ό μ±μμ£ΌμΈμ.\n") |
| sys.exit(1) |
| else: |
| print("β
κ·Έλν κ΅¬μ± λ° μ°κ²°μ± μ μ β RAG ν
μ€νΈλ₯Ό μμν©λλ€.\n") |
|
|
|
|
| |
| def run_scenario(label: str, query: str, expected_keywords: list[str]): |
| from src.retrieval.finRetrieval import graphrag |
|
|
| print("=" * 60) |
| print(f"π μλ리μ€: {label}") |
| print(f" μ§λ¬Έ: {query}") |
| print("=" * 60) |
|
|
| start = time.time() |
| result = graphrag.search(query_text=query) |
| elapsed = time.time() - start |
|
|
| answer = result.answer if result and result.answer else "" |
|
|
| print(f"\nπ GraphRAG μλ΅ ({elapsed:.1f}μ΄):\n") |
| print(answer) |
|
|
| |
| print("\nπ νμ§ μ²΄ν¬:") |
| all_pass = True |
|
|
| |
| if len(answer.strip()) > 50: |
| print(" β
μλ΅ κΈΈμ΄ μΆ©λΆ (50μ μ΄μ)") |
| else: |
| print(f" β μλ΅μ΄ λ무 μ§§μ ({len(answer.strip())}μ)") |
| all_pass = False |
|
|
| |
| found = [kw for kw in expected_keywords if kw in answer] |
| missing = [kw for kw in expected_keywords if kw not in answer] |
| if found: |
| print(f" β
ν΅μ¬ ν€μλ ν¬ν¨: {found}") |
| if missing: |
| print(f" β οΈ λ―Έν¬ν¨ ν€μλ: {missing}") |
|
|
| |
| source_indicators = ["κΈ°μ¬", "μΆμ²", "λ΄μ€", "보λ", "λ°λ₯΄λ©΄", "λ°ν", "http"] |
| has_source = any(ind in answer for ind in source_indicators) |
| if has_source: |
| print(" β
μΆμ²/κ·Όκ±° νκΈ° μμ") |
| else: |
| print(" β οΈ μΆμ²/κ·Όκ±° νκΈ° μμ (RAG μλ΅μ΄μ§λ§ κ·Όκ±°κ° λΆλͺ
ν)") |
| all_pass = False |
|
|
| overall = "β
PASS" if all_pass else "β οΈ PARTIAL (κ°μ μ¬μ§ μμ)" |
| print(f"\n β μ΅μ’
νμ : {overall}") |
| print() |
| return all_pass |
|
|
|
|
| |
| if __name__ == "__main__": |
| |
| check_graph_structure() |
|
|
| results = [] |
|
|
| |
| results.append(run_scenario( |
| label="β μ νμν β μ νμνμ 'μ ν AI μ ν¬νΈν΄λ¦¬μ€' λ‘보μ΄λλ°μ΄μ κΈ°μ κ³Ό κ°μΈ λ§μΆ€ν μλΉμ€μ νΉμ§μ μ€λͺ
ν΄μ€", |
| query="μ νμνμ 'μ ν AI μ ν¬νΈν΄λ¦¬μ€' λ‘보μ΄λλ°μ΄μ κΈ°μ κ³Ό κ°μΈ λ§μΆ€ν μλΉμ€μ νΉμ§μ μ€λͺ
ν΄μ€", |
| expected_keywords=["μ ν", "λ‘보μ΄λλ°μ΄μ "], |
| )) |
|
|
| |
| results.append(run_scenario( |
| label="β‘ μΉ΄μΉ΄μ€νμ΄ β μΉ΄μΉ΄μ€νμ΄κ° μ¬νμΌλ¬λ₯Ό μν΄ κ°λ°ν 'AI λμμ μ©νκ°' λͺ¨λΈμ μ₯μ κ³Ό λμΆ μΉμΈ ν¨κ³Όλ 무μμΈκ°μ?", |
| query="μΉ΄μΉ΄μ€νμ΄κ° μ¬νμΌλ¬λ₯Ό μν΄ κ°λ°ν 'AI λμμ μ©νκ°' λͺ¨λΈμ μ₯μ κ³Ό λμΆ μΉμΈ ν¨κ³Όλ 무μμΈκ°μ?", |
| expected_keywords=["μΉ΄μΉ΄μ€νμ΄", "λμμ μ©νκ°"], |
| )) |
|
|
| |
| results.append(run_scenario( |
| label="β’ ν μ€λ±
ν¬ β ν μ€λ±
ν¬μ μ€μκ° λ³΄μ΄μ€νΌμ± νμ§ κΈ°μ μΈ 'ν μ€ AI FDS'μ μλ μ리μ μ°¨λ¨μ¨μ μλ €μ€", |
| query="ν μ€λ±
ν¬μ μ€μκ° λ³΄μ΄μ€νΌμ± νμ§ κΈ°μ μΈ 'ν μ€ AI FDS'μ μλ μ리μ μ°¨λ¨μ¨μ μλ €μ€", |
| expected_keywords=["ν μ€", "FDS"], |
| )) |
|
|
| |
| results.append(run_scenario( |
| label="β£ λ€μ΄λ²νμ΄ β λ€μ΄λ²νμ΄κ° μΆμν 'AI κΈμ΅ λΉμ'κ° λ§μ΄λ°μ΄ν°μ κ²°ν©νμ¬ μ 곡νλ λ§μΆ€ μμ° κ°μ΄λλ μ΄λ€ κ²μΈκ°μ?", |
| query="λ€μ΄λ²νμ΄κ° μΆμν 'AI κΈμ΅ λΉμ'κ° λ§μ΄λ°μ΄ν°μ κ²°ν©νμ¬ μ 곡νλ λ§μΆ€ μμ° κ°μ΄λλ μ΄λ€ κ²μΈκ°μ?", |
| expected_keywords=["λ€μ΄λ²νμ΄", "λ§μ΄λ°μ΄ν°"], |
| )) |
|
|
| |
| print("=" * 60) |
| print("π μ΅μ’
μμ½") |
| print("=" * 60) |
| labels = ["β μ ν AI μ ν¬νΈν΄λ¦¬μ€", "β‘ μΉ΄μΉ΄μ€νμ΄ AI μ μ©νκ°", "β’ ν μ€ AI FDS", "β£ λ€μ΄λ²νμ΄ AI κΈμ΅ λΉμ"] |
| for label, passed in zip(labels, results): |
| print(f" {'β
PASS' if passed else 'β οΈ PARTIAL'} | {label}") |
| print() |
| pass_count = sum(results) |
| print(f" μ΄ {pass_count}/{len(results)}κ° μλλ¦¬μ€ μμ ν΅κ³Ό") |
|
|