Spaces:

nnsohamnn
/

Classification.Text_Gen

Sleeping

App Files Files Community

nnsohamnn commited on May 9, 2025

Commit

633e441

verified ·

1 Parent(s): 6e6f523

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -81

app.py CHANGED Viewed

@@ -1,89 +1,118 @@
 import gradio as gr
-import numpy as np
 import tensorflow as tf
 import pickle
 from tensorflow.keras.preprocessing.sequence import pad_sequences
-# === Load Classifier ===
-classifier_model = tf.keras.models.load_model("classifier_model.keras")
-with open("classifier_tokenizer.pkl", "rb") as f:
-    classifier_tokenizer = pickle.load(f)
-# === Load Text Generator ===
-textgen_model = tf.keras.models.load_model("textgen_model.keras")
-with open("textgen_tokenizer.pkl", "rb") as f:
-    textgen_tokenizer = pickle.load(f)
-# === Settings ===
-CLASS_LABELS = ['Science', 'Math', 'History']  # Replace with actual labels
-MAX_LEN_CLASSIFIER = 100
-MAX_LEN_TEXTGEN = 50
-# === Classifier Inference ===
-def classify_text(text):
-    seq = classifier_tokenizer.texts_to_sequences([text])
-    padded = pad_sequences(seq, maxlen=MAX_LEN_CLASSIFIER)
-    preds = classifier_model.predict(padded, verbose=0)[0]
-    return {CLASS_LABELS[i]: float(preds[i]) for i in range(len(CLASS_LABELS))}
-# === Text Generation with Top-k Sampling ===
-def generate_text(seed_text, next_words=15, k=10, temperature=0.9):
-    recent_words = set()
-    for _ in range(next_words):
-        token_list = textgen_tokenizer.texts_to_sequences([seed_text])[0]
-        token_list = pad_sequences([token_list], maxlen=MAX_LEN_TEXTGEN-1, padding='pre')
-        predicted_probs = textgen_model.predict(token_list, verbose=0)[0]
-        scaled_probs = np.log(predicted_probs + 1e-10) / temperature
-        exp_probs = np.exp(scaled_probs)
-        normalized_probs = exp_probs / np.sum(exp_probs)
-        top_k_indices = np.argsort(normalized_probs)[-k:]
-        top_k_probs = normalized_probs[top_k_indices]
-        top_k_probs = top_k_probs / np.sum(top_k_probs)
-        predicted_word_index = np.random.choice(top_k_indices, p=top_k_probs)
-        # Avoid repetition
-        attempts = 0
-        while predicted_word_index in recent_words and attempts < 5:
-            predicted_word_index = np.random.choice(top_k_indices, p=top_k_probs)
-            attempts += 1
-        recent_words.add(predicted_word_index)
-        if len(recent_words) > 10:
-            recent_words.pop()
         output_word = ""
-        for word, index in textgen_tokenizer.word_index.items():
-            if index == predicted_word_index:
                 output_word = word
                 break
-        seed_text += " " + output_word
-    return seed_text
-# === Gradio UI ===
-with gr.Blocks() as demo:
-    gr.Markdown("## Dual Model: Text Classifier + Text Generator")
-    with gr.Tab("Text Classification"):
-        input_text = gr.Textbox(label="Enter Text", lines=4)
-        classify_btn = gr.Button("Classify")
-        output_label = gr.Label()
-        classify_btn.click(fn=classify_text, inputs=input_text, outputs=output_label)
-    with gr.Tab("Text Generation"):
-        seed_text = gr.Textbox(label="Seed Text", lines=4)
-        num_words = gr.Slider(5, 100, value=30, label="Words to Generate")
-        k_top = gr.Slider(1, 50, value=10, step=1, label="Top-k Sampling")
-        temp = gr.Slider(0.5, 1.5, value=0.9, label="Temperature")
-        generate_btn = gr.Button("Generate")
-        gen_output = gr.Textbox(label="Generated Text", lines=6)
-        generate_btn.click(fn=generate_text, inputs=[seed_text, num_words, k_top, temp], outputs=gen_output)
 demo.launch()

 import gradio as gr
 import tensorflow as tf
+import numpy as np
 import pickle
 from tensorflow.keras.preprocessing.sequence import pad_sequences
+import re
+# Load models and tokenizers
+def load_models():
+    # Load classifier model and tokenizer
+    classifier_model = tf.keras.models.load_model('classifier_model.keras')
+    with open('classifier_tokenizer.pkl', 'rb') as handle:
+        classifier_tokenizer = pickle.load(handle)
+    # Load text generator model and tokenizer
+    textgen_model = tf.keras.models.load_model('textgen_model.keras')
+    with open('textgen_tokenizer.pkl', 'rb') as handle:
+        textgen_tokenizer = pickle.load(handle)
+    return classifier_model, classifier_tokenizer, textgen_model, textgen_tokenizer
+# Text cleaning function
+def clean_text(text):
+    text = re.sub(r'[^\w\s.,!?]', '', text)
+    text = re.sub(r'\b\d+\b', '', text)
+    text = text.replace('co2', 'carbon dioxide')
+    text = text.lower()
+    text = ' '.join(text.split())
+    return text
+# Classification function
+def classify_text(text, model, tokenizer):
+    cleaned_text = clean_text(text)
+    sequence = tokenizer.texts_to_sequences([cleaned_text])
+    padded = pad_sequences(sequence, maxlen=255, padding='pre')
+    prediction = model.predict(padded)[0]
+    # Get the highest probability class
+    class_idx = np.argmax(prediction)
+    classes = ['Science', 'Maths', 'History']
+    confidence = prediction[class_idx] * 100
+    return classes[class_idx], confidence
+# Text generation function
+def generate_text(prompt, model, tokenizer, max_length=50, temperature=0.7):
+    cleaned_prompt = clean_text(prompt)
+    input_text = cleaned_prompt
+    for _ in range(max_length):
+        token_list = tokenizer.texts_to_sequences([input_text])[0]
+        token_list = pad_sequences([token_list], maxlen=255, padding='pre')
+        predicted = model.predict(token_list, verbose=0)[0]
+        # Apply temperature
+        predicted = np.log(predicted) / temperature
+        exp_preds = np.exp(predicted)
+        predicted = exp_preds / np.sum(exp_preds)
+        # Sample from the distribution
+        predicted_index = np.random.choice(len(predicted), p=predicted)
         output_word = ""
+        for word, index in tokenizer.word_index.items():
+            if index == predicted_index:
                 output_word = word
                 break
+        if output_word == "":
+            break
+        input_text += " " + output_word
+    return input_text
+# Load models
+classifier_model, classifier_tokenizer, textgen_model, textgen_tokenizer = load_models()
+# Create Gradio interface
+def classify_interface(text):
+    subject, confidence = classify_text(text, classifier_model, classifier_tokenizer)
+    return f"Subject: {subject} (Confidence: {confidence:.2f}%)"
+def generate_interface(prompt, length=50, temp=0.7):
+    return generate_text(prompt, textgen_model, textgen_tokenizer, max_length=int(length), temperature=float(temp))
+# Create tabs for different functionalities
+with gr.Blocks(title="Science Text Analyzer") as demo:
+    gr.Markdown("# Science Text Analyzer")
+    with gr.Tab("Classify Text"):
+        gr.Markdown("## Classify scientific text into Physics, Chemistry, or Biology")
+        with gr.Row():
+            with gr.Column():
+                text_input = gr.Textbox(label="Enter scientific text", lines=5)
+                classify_button = gr.Button("Classify")
+            with gr.Column():
+                output = gr.Textbox(label="Classification Result")
+        classify_button.click(fn=classify_interface, inputs=text_input, outputs=output)
+    with gr.Tab("Generate Text"):
+        gr.Markdown("## Generate scientific text based on a prompt")
+        with gr.Row():
+            with gr.Column():
+                prompt_input = gr.Textbox(label="Enter a prompt", lines=3)
+                length_slider = gr.Slider(minimum=10, maximum=200, value=50, step=10, label="Maximum Length")
+                temp_slider = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature (Creativity)")
+                generate_button = gr.Button("Generate")
+            with gr.Column():
+                generated_output = gr.Textbox(label="Generated Text", lines=8)
+        generate_button.click(fn=generate_interface, inputs=[prompt_input, length_slider, temp_slider], outputs=generated_output)
+    gr.Markdown("### About")
+    gr.Markdown("This app uses deep learning models trained on scientific texts to classify and generate content related to Physics, Chemistry, and Biology.")
+# Launch the app
 demo.launch()