from encoder import UnifiedEncoder import os # Configuration model_name = "nomic-embed-text-v1.5" text_file = "text.md" # Question query = "How has the relationship between Machine Learning and Artificial Intelligence evolved from the 1950s to the 1990s, specifically regarding the shift from symbolic approaches to statistical methods, and what role did neural networks play in this transition?" print(f"Loading model: {model_name}...") # Initialize encoder from HF Hub encoder = UnifiedEncoder(model_name=model_name, model_name_or_path="XiaSheng/FreeChunk-nomic", granularities=[2, 4], trust_remote_code=True) # Read text if not os.path.exists(text_file): print(f"Error: {text_file} not found.") exit(1) with open(text_file, "r", encoding="utf-8") as f: text = f.read() print(f"Read text length: {len(text)} characters") print("Source: https://en.wikipedia.org/wiki/Machine_learning") # Build vector store print("Building vector store (chunking and encoding)...") encoder.build_vector_store(text) # Query print(f"\nQuerying: {query}") results = encoder.query(query, top_k=3, aggregation_mode='post') print("\nResults:") print("-" * 50) if isinstance(results, str): print(results) else: for res in results: print(res) print("-" * 50)