Initial upload of FreeChunk model with custom code

76cd6b1 verified 27 days ago

1.27 kB

	from encoder import UnifiedEncoder
	import os

	# Configuration
	model_name = "nomic-embed-text-v1.5"
	text_file = "text.md"

	# Question
	query = "How has the relationship between Machine Learning and Artificial Intelligence evolved from the 1950s to the 1990s, specifically regarding the shift from symbolic approaches to statistical methods, and what role did neural networks play in this transition?"

	print(f"Loading model: {model_name}...")
	# Initialize encoder from HF Hub
	encoder = UnifiedEncoder(model_name=model_name, model_name_or_path="XiaSheng/FreeChunk-nomic", granularities=[2, 4], trust_remote_code=True)

	# Read text
	if not os.path.exists(text_file):
	print(f"Error: {text_file} not found.")
	exit(1)

	with open(text_file, "r", encoding="utf-8") as f:
	text = f.read()

	print(f"Read text length: {len(text)} characters")
	print("Source: https://en.wikipedia.org/wiki/Machine_learning")

	# Build vector store
	print("Building vector store (chunking and encoding)...")
	encoder.build_vector_store(text)

	# Query
	print(f"\nQuerying: {query}")
	results = encoder.query(query, top_k=3, aggregation_mode='post')

	print("\nResults:")
	print("-" * 50)
	if isinstance(results, str):
	print(results)
	else:
	for res in results:
	print(res)
	print("-" * 50)