Spaces:

TARAMALIK16
/

moodify_textModel

Sleeping

moodify_textModel / textModel /fix_nltk.py

vancyferns

added textModel files to github repo

2a89678 5 months ago

1.67 kB

	# Run this script to download required NLTK data
	import nltk
	import ssl

	# Handle SSL certificate issues if they occur
	try:
	_create_unverified_https_context = ssl._create_unverified_context
	except AttributeError:
	pass
	else:
	ssl._create_default_https_context = _create_unverified_https_context

	# Download required NLTK data
	print("Downloading NLTK data...")

	try:
	# Download the newer punkt_tab tokenizer
	nltk.download('punkt_tab')
	print("✓ punkt_tab downloaded successfully")
	except:
	print("Failed to download punkt_tab, trying punkt...")
	try:
	nltk.download('punkt')
	print("✓ punkt downloaded successfully")
	except:
	print("Failed to download punkt tokenizer")

	try:
	# Download stopwords
	nltk.download('stopwords')
	print("✓ stopwords downloaded successfully")
	except:
	print("Failed to download stopwords")

	print("NLTK setup complete!")

	# Test the downloads
	try:
	from nltk.tokenize import sent_tokenize
	from nltk.corpus import stopwords

	test_text = "Hello world. This is a test."
	sentences = sent_tokenize(test_text)
	stop_words = set(stopwords.words('english'))

	print(f"\nTesting tokenization:")
	print(f"Input: {test_text}")
	print(f"Sentences: {sentences}")
	print(f"Number of English stopwords: {len(stop_words)}")
	print("\n✓ All NLTK components working correctly!")

	except Exception as e:
	print(f"\n❌ Error testing NLTK components: {e}")
	print("You may need to run the downloads manually in Python:")
	print(">>> import nltk")
	print(">>> nltk.download('punkt_tab')")
	print(">>> nltk.download('stopwords')")