Spaces:

nnsohamnn
/

Classification.Text_Gen

Sleeping

App Files Files Community

Classification.Text_Gen / app.py

nnsohamnn

Update app.py

633e441 verified 10 months ago

raw

history blame

4.65 kB

	import gradio as gr
	import tensorflow as tf
	import numpy as np
	import pickle
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import re

	# Load models and tokenizers
	def load_models():
	# Load classifier model and tokenizer
	classifier_model = tf.keras.models.load_model('classifier_model.keras')
	with open('classifier_tokenizer.pkl', 'rb') as handle:
	classifier_tokenizer = pickle.load(handle)

	# Load text generator model and tokenizer
	textgen_model = tf.keras.models.load_model('textgen_model.keras')
	with open('textgen_tokenizer.pkl', 'rb') as handle:
	textgen_tokenizer = pickle.load(handle)

	return classifier_model, classifier_tokenizer, textgen_model, textgen_tokenizer

	# Text cleaning function
	def clean_text(text):
	text = re.sub(r'[^\w\s.,!?]', '', text)
	text = re.sub(r'\b\d+\b', '', text)
	text = text.replace('co2', 'carbon dioxide')
	text = text.lower()
	text = ' '.join(text.split())
	return text

	# Classification function
	def classify_text(text, model, tokenizer):
	cleaned_text = clean_text(text)
	sequence = tokenizer.texts_to_sequences([cleaned_text])
	padded = pad_sequences(sequence, maxlen=255, padding='pre')
	prediction = model.predict(padded)[0]

	# Get the highest probability class
	class_idx = np.argmax(prediction)
	classes = ['Science', 'Maths', 'History']
	confidence = prediction[class_idx] * 100

	return classes[class_idx], confidence

	# Text generation function
	def generate_text(prompt, model, tokenizer, max_length=50, temperature=0.7):
	cleaned_prompt = clean_text(prompt)
	input_text = cleaned_prompt

	for _ in range(max_length):
	token_list = tokenizer.texts_to_sequences([input_text])[0]
	token_list = pad_sequences([token_list], maxlen=255, padding='pre')

	predicted = model.predict(token_list, verbose=0)[0]

	# Apply temperature
	predicted = np.log(predicted) / temperature
	exp_preds = np.exp(predicted)
	predicted = exp_preds / np.sum(exp_preds)

	# Sample from the distribution
	predicted_index = np.random.choice(len(predicted), p=predicted)

	output_word = ""
	for word, index in tokenizer.word_index.items():
	if index == predicted_index:
	output_word = word
	break

	if output_word == "":
	break

	input_text += " " + output_word

	return input_text

	# Load models
	classifier_model, classifier_tokenizer, textgen_model, textgen_tokenizer = load_models()

	# Create Gradio interface
	def classify_interface(text):
	subject, confidence = classify_text(text, classifier_model, classifier_tokenizer)
	return f"Subject: {subject} (Confidence: {confidence:.2f}%)"

	def generate_interface(prompt, length=50, temp=0.7):
	return generate_text(prompt, textgen_model, textgen_tokenizer, max_length=int(length), temperature=float(temp))

	# Create tabs for different functionalities
	with gr.Blocks(title="Science Text Analyzer") as demo:
	gr.Markdown("# Science Text Analyzer")

	with gr.Tab("Classify Text"):
	gr.Markdown("## Classify scientific text into Physics, Chemistry, or Biology")
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(label="Enter scientific text", lines=5)
	classify_button = gr.Button("Classify")
	with gr.Column():
	output = gr.Textbox(label="Classification Result")
	classify_button.click(fn=classify_interface, inputs=text_input, outputs=output)

	with gr.Tab("Generate Text"):
	gr.Markdown("## Generate scientific text based on a prompt")
	with gr.Row():
	with gr.Column():
	prompt_input = gr.Textbox(label="Enter a prompt", lines=3)
	length_slider = gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Maximum Length")
	temp_slider = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature (Creativity)")
	generate_button = gr.Button("Generate")
	with gr.Column():
	generated_output = gr.Textbox(label="Generated Text", lines=8)
	generate_button.click(fn=generate_interface, inputs=[prompt_input, length_slider, temp_slider], outputs=generated_output)

	gr.Markdown("### About")
	gr.Markdown("This app uses deep learning models trained on scientific texts to classify and generate content related to Physics, Chemistry, and Biology.")

	# Launch the app
	demo.launch()