Spaces:
Sleeping
Sleeping
File size: 9,616 Bytes
35b7ac6 f24c4ab 633e441 35b7ac6 633e441 f24c4ab 633e441 f24c4ab 633e441 f24c4ab 633e441 f24c4ab 633e441 f24c4ab 633e441 35b7ac6 633e441 35b7ac6 633e441 f24c4ab 633e441 f24c4ab 633e441 043812c 9886933 d740850 9886933 043812c f24c4ab 633e441 9a3b49f e723247 9a3b49f 633e441 373753a 633e441 9a3b49f 043812c 633e441 9a3b49f ae4db81 633e441 9a3b49f 7280a44 9a3b49f 633e441 9a3b49f 043812c 633e441 9a3b49f 373753a e723247 043812c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import gradio as gr
import tensorflow as tf
import keras
import numpy as np
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re
import os
# Define and register the custom Perplexity metric
@keras.saving.register_keras_serializable(package="Custom")
class Perplexity(keras.metrics.Metric):
def __init__(self, name='perplexity', dtype=None, **kwargs):
super().__init__(name=name, dtype=dtype, **kwargs)
self.cross_entropy = keras.metrics.Mean(name='cross_entropy')
def update_state(self, y_true, y_pred, sample_weight=None):
# Calculate cross-entropy
cross_entropy_values = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
# Update the internal mean metric
self.cross_entropy.update_state(cross_entropy_values, sample_weight)
def result(self):
# Perplexity is the exponential of the cross-entropy
return tf.exp(self.cross_entropy.result())
def reset_state(self):
self.cross_entropy.reset_state()
def get_config(self):
config = super().get_config()
return config
# Text cleaning function
def clean_text(text):
text = re.sub(r'[^\w\s.,!?]', '', text)
text = re.sub(r'\b\d+\b', '', text)
text = text.replace('co2', 'carbon dioxide')
text = text.lower()
text = ' '.join(text.split())
return text
# Load models and tokenizers
def load_models():
print("Loading models and tokenizers...")
# Load models with custom objects for Perplexity
custom_objects = {'Perplexity': Perplexity}
try:
with keras.saving.custom_object_scope(custom_objects):
classifier_model = keras.models.load_model('classifier_model.keras')
textgen_model = keras.models.load_model('textgen_model.keras')
print("Models loaded successfully with custom objects")
except Exception as e:
print(f"Error loading models with custom objects: {e}")
raise
# Load tokenizers
try:
with open('classifier_tokenizer.pkl', 'rb') as handle:
classifier_tokenizer = pickle.load(handle)
with open('textgen_tokenizer.pkl', 'rb') as handle:
textgen_tokenizer = pickle.load(handle)
print("Tokenizers loaded successfully")
except Exception as e:
print(f"Error loading tokenizers: {e}")
raise
return classifier_model, classifier_tokenizer, textgen_model, textgen_tokenizer
# Classification function
def classify_text(text, model, tokenizer):
cleaned_text = clean_text(text)
sequence = tokenizer.texts_to_sequences([cleaned_text])
padded = pad_sequences(sequence, maxlen=255, padding='pre')
prediction = model.predict(padded)[0]
# Get the highest probability class
class_idx = np.argmax(prediction)
classes = ['Science', 'Maths', 'History']
confidence = prediction[class_idx] * 100
return classes[class_idx], confidence
# Text generation function
def generate_text(prompt, model, tokenizer, max_length=50, temperature=0.7):
cleaned_prompt = clean_text(prompt)
input_text = cleaned_prompt
for _ in range(max_length):
token_list = tokenizer.texts_to_sequences([input_text])[0]
token_list = pad_sequences([token_list], maxlen=255, padding='pre')
predicted = model.predict(token_list, verbose=0)[0]
# Apply temperature
predicted = np.log(predicted) / temperature
exp_preds = np.exp(predicted)
predicted = exp_preds / np.sum(exp_preds)
# Sample from the distribution
predicted_index = np.random.choice(len(predicted), p=predicted)
output_word = ""
for word, index in tokenizer.word_index.items():
if index == predicted_index:
output_word = word
break
if output_word == "":
break
input_text += " " + output_word
return input_text
# Print environment info for debugging
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tf.keras.__version__}")
print(f"Current directory contents: {os.listdir('.')}")
# Load models with error handling
try:
print("Starting model loading process...")
classifier_model, classifier_tokenizer, textgen_model, textgen_tokenizer = load_models()
print("Models and tokenizers loaded successfully")
except Exception as e:
print(f"Error in model loading process: {e}")
raise
# Create Gradio interface functions
def classify_interface(text):
subject, confidence = classify_text(text, classifier_model, classifier_tokenizer)
return f"Subject: {subject} (Confidence: {confidence:.2f}%)"
def generate_interface(prompt, length=50, temp=0.7):
return generate_text(prompt, textgen_model, textgen_tokenizer, max_length=int(length), temperature=float(temp))
# Define example inputs for the classifier
classifier_examples = [
["The process of photosynthesis converts light energy into chemical energy, producing oxygen as a byproduct."],
["The Pythagorean theorem states that in a right-angled triangle, the square of the length of the hypotenuse equals the sum of squares of the other two sides."],
["The Industrial Revolution began in Great Britain in the late 18th century and spread to other parts of Europe and North America."],
["Atoms consist of a nucleus containing protons and neutrons, surrounded by electrons that orbit in energy levels."],
["Differential equations are mathematical equations that relate a function with its derivatives, representing rates of change."],
["The Treaty of Versailles was signed in 1919, officially ending World War I and imposing harsh penalties on Germany."]
]
# Define example inputs for the text generator
generator_examples = [
["Newton's laws of motion explain", 30, 0.8],
["Climate change affects ecosystems by", 20, 0.7],
["Quantum mechanics revolutionized physics when", 20, 0.9],
["Chemical reactions occur when", 25, 0.6]
]
# Create Gradio interface
with gr.Blocks(title="Science Text Analyzer") as demo:
gr.Markdown("# Science Text Analyzer")
with gr.Tab("Classify Text"):
gr.Markdown("### Classify Academic Text")
gr.Markdown(
"The **Science Text Analyzer** uses an **LSTM-based text classification model** trained on curated academic datasets sourced from **Hugging Face**. "
"It predicts whether input text belongs to **Science**, **Mathematics**, or **History**, leveraging **sequential context** and **language structure** for accurate subject classification."
)
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Enter Text", lines=5, placeholder="Paste a sentence or paragraph here...")
classify_button = gr.Button("Classify")
with gr.Column():
output = gr.Textbox(label="Classification Result", placeholder="The predicted subject and confidence will appear here.")
# Add examples for the classifier
gr.Examples(
examples=classifier_examples,
inputs=text_input,
outputs=output,
fn=classify_interface,
cache_examples=True
)
classify_button.click(fn=classify_interface, inputs=text_input, outputs=output)
with gr.Tab("Generate Text"):
gr.Markdown("### Generate Academic Text")
gr.Markdown(
"Use this tool to generate educational text based on a given prompt. "
"You can control the output length and creativity using the sliders below. "
"**Note:** Longer text lengths will take more time to generate, so please be patient when requesting extensive outputs."
)
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(label="Enter a Prompt", lines=3, placeholder="Type an introductory sentence or concept...")
length_slider = gr.Slider(minimum=10, maximum=60, value=50, step=10, label="Maximum Length (words)")
temp_slider = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature (Creativity Level)")
generate_button = gr.Button("Generate")
with gr.Column():
generated_output = gr.Textbox(label="Generated Text", lines=8, placeholder="The generated text will appear here.")
# Add examples for the text generator
gr.Examples(
examples=generator_examples,
inputs=[prompt_input, length_slider, temp_slider],
outputs=generated_output,
fn=generate_interface,
cache_examples=True
)
generate_button.click(fn=generate_interface, inputs=[prompt_input, length_slider, temp_slider], outputs=generated_output)
gr.Markdown("### About This App")
gr.Markdown(
"The **Science Text Analyzer** uses deep learning models trained on curated academic datasets to classify and generate content "
"related to academic disciplines. The **classifier** categorizes input text into one of three subjects: **Science**, **Mathematics**, or **History**. "
"The **text generator** produces coherent scientific passages, especially focused on **Physics**, **Chemistry**, and **Biology**, making it a valuable tool "
"for **educational research**, **content creation**, and **curriculum support**."
)
# Launch the app
demo.launch()
|