Spaces:
Paused
Paused
| # local_cpu_test.py - ๋ก์ปฌ CPU ํ๊ฒฝ์ฉ RAG ํ ์คํธ ์คํฌ๋ฆฝํธ | |
| """ | |
| ๋ก์ปฌ CPU ํ๊ฒฝ์์ RAG ์์คํ ์ ํ ์คํธํ๋ ์คํฌ๋ฆฝํธ | |
| ํ๊น ํ์ด์ค ์คํ์ด์ค ๋ฐฐํฌ ์ ๋ก์ปฌ ๊ฒ์ฆ์ฉ | |
| """ | |
| import os | |
| import sys | |
| import time | |
| from datetime import datetime | |
| # ๋ก์ปฌ ํ๊ฒฝ์์ ๋ช ์์ ์ผ๋ก ์ค์ | |
| os.environ.pop('SPACE_ID', None) # ํ๊น ํ์ด์ค ํ๊ฒฝ ๋ณ์ ์ ๊ฑฐ | |
| def test_imports(): | |
| """ํ์ ๋ผ์ด๋ธ๋ฌ๋ฆฌ import ํ ์คํธ""" | |
| print("=" * 50) | |
| print("๋ผ์ด๋ธ๋ฌ๋ฆฌ import ํ ์คํธ") | |
| print("=" * 50) | |
| try: | |
| import torch | |
| print(f"โ PyTorch {torch.__version__}") | |
| print(f" CUDA ์ฌ์ฉ ๊ฐ๋ฅ: {torch.cuda.is_available()}") | |
| print(f" ๋๋ฐ์ด์ค: {'cuda' if torch.cuda.is_available() else 'cpu'}") | |
| except ImportError as e: | |
| print(f"โ PyTorch import ์คํจ: {e}") | |
| return False | |
| try: | |
| import sentence_transformers | |
| print(f"โ sentence-transformers {sentence_transformers.__version__}") | |
| except ImportError as e: | |
| print(f"โ sentence-transformers import ์คํจ: {e}") | |
| return False | |
| try: | |
| import faiss | |
| print(f"โ FAISS ์ฌ์ฉ ๊ฐ๋ฅ") | |
| except ImportError as e: | |
| print(f"โ FAISS import ์คํจ: {e}") | |
| return False | |
| try: | |
| import sklearn | |
| print(f"โ scikit-learn {sklearn.__version__}") | |
| except ImportError as e: | |
| print(f"โ scikit-learn import ์คํจ: {e}") | |
| return False | |
| print("โ ๋ชจ๋ ๋ผ์ด๋ธ๋ฌ๋ฆฌ import ์ฑ๊ณต") | |
| return True | |
| def test_config(): | |
| """์ค์ ํ ์คํธ""" | |
| print("\n" + "=" * 50) | |
| print("์ค์ ํ ์คํธ") | |
| print("=" * 50) | |
| try: | |
| from config import RAG_CONFIG, IS_HUGGINGFACE_SPACE | |
| print(f"ํ๊น ํ์ด์ค ํ๊ฒฝ: {IS_HUGGINGFACE_SPACE}") | |
| print(f"์๋ฒ ๋ฉ ๋ชจ๋ธ: {RAG_CONFIG['embedding_models'][0]}") | |
| print(f"๋ฐฐ์น ํฌ๊ธฐ: {RAG_CONFIG['batch_size']}") | |
| print(f"Top-K: {RAG_CONFIG['top_k']}") | |
| print(f"์๊ณ๊ฐ: {RAG_CONFIG['similarity_threshold']}") | |
| print(f"ํ์ด๋ธ๋ฆฌ๋ ๊ฐ์ค์น: {RAG_CONFIG['hybrid_weights']}") | |
| return True | |
| except Exception as e: | |
| print(f"์ค์ ๋ก๋ ์คํจ: {e}") | |
| return False | |
| def test_embedding_model(): | |
| """์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ ํ ์คํธ""" | |
| print("\n" + "=" * 50) | |
| print("์๋ฒ ๋ฉ ๋ชจ๋ธ ํ ์คํธ") | |
| print("=" * 50) | |
| try: | |
| from sentence_transformers import SentenceTransformer | |
| from config import RAG_CONFIG | |
| # ๊ฐ์ฅ ๊ฐ๋ฒผ์ด ๋ชจ๋ธ๋ก ํ ์คํธ | |
| model_name = 'paraphrase-multilingual-MiniLM-L12-v2' | |
| print(f"๋ชจ๋ธ ๋ก๋ฉ: {model_name}") | |
| start_time = time.time() | |
| model = SentenceTransformer( | |
| model_name, | |
| device='cpu', # ๋ช ์์ ์ผ๋ก CPU ์ฌ์ฉ | |
| cache_folder='./model_cache' | |
| ) | |
| load_time = time.time() - start_time | |
| print(f"โ ๋ชจ๋ธ ๋ก๋ ์๋ฃ ({load_time:.2f}์ด)") | |
| # ๊ฐ๋จํ ์๋ฒ ๋ฉ ํ ์คํธ | |
| test_texts = ["์ทจ๋์ธ์จ์ด ์ผ๋ง์ธ๊ฐ์?", "์ฃผํ ๊ตฌ์ ์ ์ธ๊ธ์?"] | |
| start_time = time.time() | |
| embeddings = model.encode(test_texts, convert_to_numpy=True) | |
| encode_time = time.time() - start_time | |
| print(f"โ ์๋ฒ ๋ฉ ์์ฑ ์๋ฃ ({encode_time:.2f}์ด)") | |
| print(f" ์๋ฒ ๋ฉ ํํ: {embeddings.shape}") | |
| return True | |
| except Exception as e: | |
| print(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ํ ์คํธ ์คํจ: {e}") | |
| return False | |
| def test_law_fetcher(): | |
| """๋ฒ๋ น ํ์ฒ ํ ์คํธ""" | |
| print("\n" + "=" * 50) | |
| print("๋ฒ๋ น ํ์ฒ ํ ์คํธ") | |
| print("=" * 50) | |
| try: | |
| from law_fetcher import HFLawAPIFetcher | |
| fetcher = HFLawAPIFetcher() | |
| print(f"โ ๋ฒ๋ น ํ์ฒ ์ด๊ธฐํ ์๋ฃ") | |
| print(f" ์บ์ ๋๋ ํ ๋ฆฌ: {fetcher.cache_dir}") | |
| print(f" ์บ์๋ ๋ฒ๋ น: {len(fetcher.cache_info)}๊ฐ") | |
| # ์บ์๊ฐ ์๋์ง ํ์ธ | |
| if fetcher.cache_info: | |
| print(" ์บ์ ์ ๋ณด:") | |
| for law_name, info in fetcher.cache_info.items(): | |
| cached_at = info.get('cached_at', 'Unknown') | |
| data_size = info.get('data_size', 0) | |
| print(f" - {law_name}: {data_size/1024:.1f}KB ({cached_at[:10]})") | |
| return True | |
| except Exception as e: | |
| print(f"๋ฒ๋ น ํ์ฒ ํ ์คํธ ์คํจ: {e}") | |
| return False | |
| def test_rag_system_minimal(): | |
| """RAG ์์คํ ์ต์ ํ ์คํธ""" | |
| print("\n" + "=" * 50) | |
| print("RAG ์์คํ ์ต์ ํ ์คํธ") | |
| print("=" * 50) | |
| try: | |
| from rag_system import HFSpacesTaxRAG | |
| print("RAG ์์คํ ์ด๊ธฐํ ์ค...") | |
| start_time = time.time() | |
| rag = HFSpacesTaxRAG() | |
| init_time = time.time() - start_time | |
| print(f"โ RAG ์์คํ ์ด๊ธฐํ ์๋ฃ ({init_time:.2f}์ด)") | |
| print(f" ํ๊ฒฝ: {'ํ๊น ํ์ด์ค' if rag.is_huggingface_space else '๋ก์ปฌ'}") | |
| print(f" ๋๋ฐ์ด์ค: {rag.device}") | |
| print(f" ์๋ฒ ๋ฉ ๋ชจ๋ธ: {rag.embedding_model.get_sentence_embedding_dimension()}์ฐจ์") | |
| # ๋ฒกํฐ DB ์ํ ํ์ธ | |
| if rag.vector_db and rag.documents: | |
| print(f" ๋ฒกํฐ DB: {rag.vector_db.ntotal}๊ฐ ๋ฒกํฐ") | |
| print(f" ๋ฌธ์ ์: {len(rag.documents)}๊ฐ") | |
| # ๊ฐ๋จํ ๊ฒ์ ํ ์คํธ | |
| test_query = "์ทจ๋์ธ ์ธ์จ" | |
| print(f"\n๊ฒ์ ํ ์คํธ: '{test_query}'") | |
| start_time = time.time() | |
| results = rag.search(test_query, top_k=2) | |
| search_time = time.time() - start_time | |
| print(f"โ ๊ฒ์ ์๋ฃ ({search_time:.2f}์ด)") | |
| print(f" ๊ฒฐ๊ณผ: {len(results)}๊ฐ ๋ฌธ์") | |
| for i, result in enumerate(results): | |
| score = result['hybrid_score'] | |
| doc_preview = result['document'][:50] | |
| print(f" {i+1}. ์ ์: {score:.3f} - {doc_preview}...") | |
| else: | |
| print(" ๋ฒกํฐ DB ์์ - ์์คํ ๊ตฌ์ถ ํ์") | |
| return True | |
| except Exception as e: | |
| print(f"RAG ์์คํ ํ ์คํธ ์คํจ: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def run_full_test(): | |
| """์ ์ฒด ํ ์คํธ ์คํ""" | |
| print("๋ก์ปฌ CPU ํ๊ฒฝ RAG ์์คํ ํ ์คํธ") | |
| print(f"์คํ ์๊ฐ: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| print(f"Python ๋ฒ์ : {sys.version}") | |
| tests = [ | |
| ("๋ผ์ด๋ธ๋ฌ๋ฆฌ Import", test_imports), | |
| ("์ค์ ", test_config), | |
| ("์๋ฒ ๋ฉ ๋ชจ๋ธ", test_embedding_model), | |
| ("๋ฒ๋ น ํ์ฒ", test_law_fetcher), | |
| ("RAG ์์คํ ", test_rag_system_minimal) | |
| ] | |
| success_count = 0 | |
| total_start = time.time() | |
| for test_name, test_func in tests: | |
| try: | |
| print(f"\n{'='*60}") | |
| print(f"ํ ์คํธ: {test_name}") | |
| print(f"{'='*60}") | |
| if test_func(): | |
| success_count += 1 | |
| print(f"โ {test_name} ํ ์คํธ ์ฑ๊ณต") | |
| else: | |
| print(f"โ {test_name} ํ ์คํธ ์คํจ") | |
| except KeyboardInterrupt: | |
| print(f"\n์ฌ์ฉ์์ ์ํด ํ ์คํธ ์ค๋จ") | |
| break | |
| except Exception as e: | |
| print(f"โ {test_name} ํ ์คํธ ์ค ์ค๋ฅ: {e}") | |
| total_time = time.time() - total_start | |
| print(f"\n{'='*60}") | |
| print(f"ํ ์คํธ ๊ฒฐ๊ณผ ์์ฝ") | |
| print(f"{'='*60}") | |
| print(f"์ฑ๊ณต: {success_count}/{len(tests)}") | |
| print(f"์ด ์์์๊ฐ: {total_time:.2f}์ด") | |
| if success_count == len(tests): | |
| print("โ ๋ชจ๋ ํ ์คํธ ํต๊ณผ - ํ๊น ํ์ด์ค ๋ฐฐํฌ ์ค๋น ์๋ฃ") | |
| return True | |
| else: | |
| print("โ ์ผ๋ถ ํ ์คํธ ์คํจ - ๋ฌธ์ ํด๊ฒฐ ํ ์ฌ์๋") | |
| return False | |
| if __name__ == "__main__": | |
| # ๋ช ๋ นํ ์ธ์ ์ฒ๋ฆฌ | |
| if len(sys.argv) > 1: | |
| if sys.argv[1] == '--imports': | |
| success = test_imports() | |
| elif sys.argv[1] == '--config': | |
| success = test_config() | |
| elif sys.argv[1] == '--embedding': | |
| success = test_embedding_model() | |
| elif sys.argv[1] == '--rag': | |
| success = test_rag_system_minimal() | |
| else: | |
| print("์ฌ์ฉ๋ฒ: python local_cpu_test.py [--imports|--config|--embedding|--rag]") | |
| sys.exit(1) | |
| else: | |
| success = run_full_test() | |
| sys.exit(0 if success else 1) |