Spaces:

Sazzz02
/

Smart-Doc-Solutions

Sleeping

App Files Files Community

Sazzz02 commited on Aug 10, 2025

Commit

46d4b20

verified ·

1 Parent(s): 42456ae

Create app.py

Browse files

Files changed (1) hide show

app.py +81 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# app.py
+import gradio as gr
+import joblib
+import re
+import nltk
+from nltk.corpus import stopwords
+import string
+# Download NLTK stopwords if not already present
+try:
+    stopwords.words('english')
+except LookupError:
+    nltk.download('stopwords')
+# Define global variables for the model, vectorizer, and stopwords
+MODEL_PATH = "random_forest_model.joblib"
+VECTORIZER_PATH = "tfidf_vectorizer.joblib"
+STOP_WORDS = set(stopwords.words('english'))
+# Load the trained model and vectorizer
+try:
+    model = joblib.load(MODEL_PATH)
+    tfidf_vectorizer = joblib.load(VECTORIZER_PATH)
+except FileNotFoundError:
+    raise FileNotFoundError(
+        "Model or vectorizer files not found. "
+        "Please ensure 'random_forest_model.joblib' and 'tfidf_vectorizer.joblib' "
+        "are in the same directory as this script."
+    )
+def preprocess_text(text):
+    """
+    Cleans and preprocesses text data to match the format used during training.
+    """
+    # Convert to lowercase
+    text = text.lower()
+    # Remove punctuation
+    text = text.translate(str.maketrans('', '', string.punctuation))
+    # Remove digits
+    text = re.sub(r'\d+', '', text)
+    # Remove stopwords
+    text = ' '.join([word for word in text.split() if word not in STOP_WORDS])
+    return text
+def predict_class(input_text):
+    """
+    Takes raw text input, preprocesses it, and returns the predicted class.
+    """
+    # Preprocess the input text
+    preprocessed_text = preprocess_text(input_text)
+    # Use the TF-IDF vectorizer to transform the text
+    text_vector = tfidf_vectorizer.transform([preprocessed_text])
+    # Get the model's prediction
+    prediction = model.predict(text_vector)
+    # Return the predicted class name
+    return prediction[0]
+# Sample inputs for the Gradio app
+example_inputs = [
+    "The company's annual financial report showed a net profit of 50 million dollars, an increase of 15% from the previous year. The key drivers were cost reduction and increased market share in Asia.",
+    "Patient medical history reveals a family history of hypertension. Symptoms include elevated blood pressure readings and persistent headaches. The patient has been prescribed a new medication.",
+    "Instructions for assembly: Attach part A to part B using the supplied screw. Ensure the connection is tight to prevent detachment. The product is intended for indoor use only."
+]
+# Set up the Gradio interface with examples
+interface = gr.Interface(
+    fn=predict_class,
+    inputs=gr.Textbox(lines=10, placeholder="Paste your document text here...", label="Input Document Text"),
+    outputs=gr.Textbox(label="Predicted Document Class"),
+    title="Document Classification App",
+    description="This app classifies an input document text into one of five predefined categories.",
+    examples=example_inputs
+)
+# Launch the app
+if __name__ == "__main__":
+    interface.launch(inline=False, share=True)