| from encoder import UnifiedEncoder | |
| import os | |
| # Configuration | |
| model_name = "nomic-embed-text-v1.5" | |
| text_file = "text.md" | |
| # Question | |
| query = "How has the relationship between Machine Learning and Artificial Intelligence evolved from the 1950s to the 1990s, specifically regarding the shift from symbolic approaches to statistical methods, and what role did neural networks play in this transition?" | |
| print(f"Loading model: {model_name}...") | |
| # Initialize encoder from HF Hub | |
| encoder = UnifiedEncoder(model_name=model_name, model_name_or_path="XiaSheng/FreeChunk-nomic", granularities=[2, 4], trust_remote_code=True) | |
| # Read text | |
| if not os.path.exists(text_file): | |
| print(f"Error: {text_file} not found.") | |
| exit(1) | |
| with open(text_file, "r", encoding="utf-8") as f: | |
| text = f.read() | |
| print(f"Read text length: {len(text)} characters") | |
| print("Source: https://en.wikipedia.org/wiki/Machine_learning") | |
| # Build vector store | |
| print("Building vector store (chunking and encoding)...") | |
| encoder.build_vector_store(text) | |
| # Query | |
| print(f"\nQuerying: {query}") | |
| results = encoder.query(query, top_k=3, aggregation_mode='post') | |
| print("\nResults:") | |
| print("-" * 50) | |
| if isinstance(results, str): | |
| print(results) | |
| else: | |
| for res in results: | |
| print(res) | |
| print("-" * 50) | |