from encoder import UnifiedEncoder
import os

# Configuration
model_name = "nomic-embed-text-v1.5"
text_file = "text.md"

# Question
query = "How has the relationship between Machine Learning and Artificial Intelligence evolved from the 1950s to the 1990s, specifically regarding the shift from symbolic approaches to statistical methods, and what role did neural networks play in this transition?"

print(f"Loading model: {model_name}...")
# Initialize encoder from HF Hub
encoder = UnifiedEncoder(model_name=model_name, model_name_or_path="XiaSheng/FreeChunk-nomic", granularities=[2, 4], trust_remote_code=True)

# Read text
if not os.path.exists(text_file):
    print(f"Error: {text_file} not found.")
    exit(1)

with open(text_file, "r", encoding="utf-8") as f:
    text = f.read()

print(f"Read text length: {len(text)} characters")
print("Source: https://en.wikipedia.org/wiki/Machine_learning")

# Build vector store
print("Building vector store (chunking and encoding)...")
encoder.build_vector_store(text)

# Query
print(f"\nQuerying: {query}")
results = encoder.query(query, top_k=3, aggregation_mode='post')

print("\nResults:")
print("-" * 50)
if isinstance(results, str):
    print(results)
else:
    for res in results:
        print(res)
print("-" * 50)