Spaces:

Pearll12
/

bbc-document-classifier

Sleeping

File size: 1,629 Bytes

import joblib
import numpy as np
import gradio as gr


MODEL_PATH = "models/best_model.pkl"

model = joblib.load(MODEL_PATH)


def softmax(scores):
    scores = np.array(scores)
    exp_scores = np.exp(scores - np.max(scores))
    return exp_scores / np.sum(exp_scores)


def classify_document(text):
    if not text or len(text.strip()) < 5:
        return "Please enter at least 5 characters.", 0.0

    prediction = model.predict([text])[0]

    decision_scores = model.decision_function([text])[0]
    probabilities = softmax(decision_scores)

    confidence_score = float(np.max(probabilities)) * 100

    return prediction, round(confidence_score, 2)


demo = gr.Interface(
    fn=classify_document,
    inputs=gr.Textbox(
        lines=8,
        placeholder="Paste news/document text here...",
        label="Input Document Text"
    ),
    outputs=[
        gr.Textbox(label="Predicted Category"),
        gr.Number(label="Confidence Score (%)")
    ],
    title="BBC News Document Classifier",
    description=(
        "Classifies document text into one of five categories: "
        "business, entertainment, politics, sport, or tech."
    ),
    examples=[
        ["The football team won the final match after scoring two goals."],
        ["The company reported strong profits and growth in global markets."],
        ["New software updates improve artificial intelligence performance."],
        ["The government introduced a new policy during the parliamentary session."],
        ["The actor received praise for her performance in the award-winning film."]
    ]
)


if __name__ == "__main__":
    demo.launch()