Spaces:

nnsohamnn
/

Classification.Text_Gen

Sleeping

App Files Files Community

Classification.Text_Gen / app.py

nnsohamnn

Update app.py

9a3b49f verified 12 months ago

raw

history blame

7.42 kB

	import gradio as gr
	import tensorflow as tf
	import keras
	import numpy as np
	import pickle
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import re
	import os

	# Define and register the custom Perplexity metric
	@keras.saving.register_keras_serializable(package="Custom")
	class Perplexity(keras.metrics.Metric):
	def __init__(self, name='perplexity', dtype=None, **kwargs):
	super().__init__(name=name, dtype=dtype, **kwargs)
	self.cross_entropy = keras.metrics.Mean(name='cross_entropy')

	def update_state(self, y_true, y_pred, sample_weight=None):
	# Calculate cross-entropy
	cross_entropy_values = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
	# Update the internal mean metric
	self.cross_entropy.update_state(cross_entropy_values, sample_weight)

	def result(self):
	# Perplexity is the exponential of the cross-entropy
	return tf.exp(self.cross_entropy.result())

	def reset_state(self):
	self.cross_entropy.reset_state()

	def get_config(self):
	config = super().get_config()
	return config

	# Text cleaning function
	def clean_text(text):
	text = re.sub(r'[^\w\s.,!?]', '', text)
	text = re.sub(r'\b\d+\b', '', text)
	text = text.replace('co2', 'carbon dioxide')
	text = text.lower()
	text = ' '.join(text.split())
	return text

	# Load models and tokenizers
	def load_models():
	print("Loading models and tokenizers...")

	# Load models with custom objects for Perplexity
	custom_objects = {'Perplexity': Perplexity}

	try:
	with keras.saving.custom_object_scope(custom_objects):
	classifier_model = keras.models.load_model('classifier_model.keras')
	textgen_model = keras.models.load_model('textgen_model.keras')

	print("Models loaded successfully with custom objects")
	except Exception as e:
	print(f"Error loading models with custom objects: {e}")
	raise

	# Load tokenizers
	try:
	with open('classifier_tokenizer.pkl', 'rb') as handle:
	classifier_tokenizer = pickle.load(handle)

	with open('textgen_tokenizer.pkl', 'rb') as handle:
	textgen_tokenizer = pickle.load(handle)

	print("Tokenizers loaded successfully")
	except Exception as e:
	print(f"Error loading tokenizers: {e}")
	raise

	return classifier_model, classifier_tokenizer, textgen_model, textgen_tokenizer

	# Classification function
	def classify_text(text, model, tokenizer):
	cleaned_text = clean_text(text)
	sequence = tokenizer.texts_to_sequences([cleaned_text])
	padded = pad_sequences(sequence, maxlen=255, padding='pre')
	prediction = model.predict(padded)[0]

	# Get the highest probability class
	class_idx = np.argmax(prediction)
	classes = ['Science', 'Maths', 'History']
	confidence = prediction[class_idx] * 100

	return classes[class_idx], confidence

	# Text generation function
	def generate_text(prompt, model, tokenizer, max_length=50, temperature=0.7):
	cleaned_prompt = clean_text(prompt)
	input_text = cleaned_prompt

	for _ in range(max_length):
	token_list = tokenizer.texts_to_sequences([input_text])[0]
	token_list = pad_sequences([token_list], maxlen=255, padding='pre')

	predicted = model.predict(token_list, verbose=0)[0]

	# Apply temperature
	predicted = np.log(predicted) / temperature
	exp_preds = np.exp(predicted)
	predicted = exp_preds / np.sum(exp_preds)

	# Sample from the distribution
	predicted_index = np.random.choice(len(predicted), p=predicted)

	output_word = ""
	for word, index in tokenizer.word_index.items():
	if index == predicted_index:
	output_word = word
	break

	if output_word == "":
	break

	input_text += " " + output_word

	return input_text

	# Print environment info for debugging
	print(f"TensorFlow version: {tf.__version__}")
	print(f"Keras version: {tf.keras.__version__}")
	print(f"Current directory contents: {os.listdir('.')}")

	# Load models with error handling
	try:
	print("Starting model loading process...")
	classifier_model, classifier_tokenizer, textgen_model, textgen_tokenizer = load_models()
	print("Models and tokenizers loaded successfully")
	except Exception as e:
	print(f"Error in model loading process: {e}")
	raise

	# Create Gradio interface functions
	def classify_interface(text):
	subject, confidence = classify_text(text, classifier_model, classifier_tokenizer)
	return f"Subject: {subject} (Confidence: {confidence:.2f}%)"

	def generate_interface(prompt, length=50, temp=0.7):
	return generate_text(prompt, textgen_model, textgen_tokenizer, max_length=int(length), temperature=float(temp))

	# Create Gradio interface
	with gr.Blocks(title="Science Text Analyzer") as demo:
	gr.Markdown("# Science Text Analyzer")

	with gr.Tab("Classify Text"):
	gr.Markdown("### Classify Academic Text")
	gr.Markdown(
	"This tool automatically classifies a given passage into one of the following academic categories: "
	"Science, Mathematics, or History. Simply enter your text below to see the predicted subject."
	)
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(label="Enter Text", lines=5, placeholder="Paste a sentence or paragraph here...")
	classify_button = gr.Button("Classify")
	with gr.Column():
	output = gr.Textbox(label="Classification Result", placeholder="The predicted subject and confidence will appear here.")
	classify_button.click(fn=classify_interface, inputs=text_input, outputs=output)

	with gr.Tab("Generate Text"):
	gr.Markdown("### Generate Academic Text")
	gr.Markdown(
	"Use this tool to generate educational text based on a given prompt. "
	"You can control the output length and creativity using the sliders below."
	)
	with gr.Row():
	with gr.Column():
	prompt_input = gr.Textbox(label="Enter a Prompt", lines=3, placeholder="Type an introductory sentence or concept...")
	length_slider = gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Maximum Length (words)")
	temp_slider = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature (Creativity Level)")
	generate_button = gr.Button("Generate")
	with gr.Column():
	generated_output = gr.Textbox(label="Generated Text", lines=8, placeholder="The generated text will appear here.")
	generate_button.click(fn=generate_interface, inputs=[prompt_input, length_slider, temp_slider], outputs=generated_output)

	gr.Markdown("### About This App")
	gr.Markdown(
	"The Science Text Analyzer uses deep learning models trained on academic corpora to classify and generate content "
	"relevant to disciplines such as Science, Mathematics, and History. It combines a classifier with a sequence-based language model "
	"to support educational research and content creation."
	)

	# Launch the app
	demo.launch()