Spaces:

Mohamed-Maher
/

Hadith_Classification

Sleeping

App Files Files Community

Mohamed-Maher commited on Jun 15, 2024

Commit

39ccf9b

verified ·

1 Parent(s): 18feff1

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -33

app.py CHANGED Viewed

@@ -1,48 +1,67 @@
 import re
-import nltk
 import pickle
 import numpy as np
 import pandas as pd
-import streamlit as st
-from datasets import load_dataset
 from sklearn.metrics.pairwise import cosine_similarity
-nltk.download('punkt')
-dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
-labels = dataset['Arabic_Grade']
-# Helper functions
-def remove_tashkeel(text):
-    tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
-    return re.sub(tashkeel_pattern, '', text)
-def preprocess_arabic_text(text):
-    text = remove_tashkeel(text)
-    tokens = nltk.word_tokenize(text)
-    cleaned_tokens = [token for token in tokens if token.isalnum()]
-    lowercase_tokens = [token.lower() for token in cleaned_tokens]
-    return " ".join(lowercase_tokens)
-# Function to predict label
-def predict_label(input_text, threshold=0.5):
-    with open("tfidf_vectorizer.pkl", "rb") as f:
-        vectorizer = pickle.load(f)
-    with open("cosine_similarity_model.pkl", "rb") as f:
-        X = pickle.load(f)
-    input_text = preprocess_arabic_text(input_text)
-    input_vector = vectorizer.transform([input_text])
-    similarities = cosine_similarity(input_vector, X).flatten()
-    max_index = np.argmax(similarities)
-    max_similarity = similarities[max_index]
-    if max_similarity >= threshold:
-        return labels.iloc[max_index]
-    else:
-        return "No similar text found in dataset"
-x = st.slider('Enter Hadith')
-st.write(x, 'Hadith Classification', predict_label)

+import os
 import re
 import pickle
 import numpy as np
 import pandas as pd
+import nltk
+import gradio as gr
 from sklearn.metrics.pairwise import cosine_similarity
+class HadithClassificationApp:
+    def __init__(self):
+        # Download NLTK resources if needed
+        nltk.download('punkt')
+        # Load the dataset and labels
+        self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
+        self.labels = self.dataset['Arabic_Grade']
+        # Load the models
+        with open("tfidf_vectorizer.pkl", "rb") as f:
+            self.vectorizer = pickle.load(f)
+        with open("cosine_similarity_model.pkl", "rb") as f:
+            self.X = pickle.load(f)
+    @staticmethod
+    def remove_tashkeel(text):
+        tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
+        return re.sub(tashkeel_pattern, '', text)
+    def preprocess_arabic_text(self, text):
+        text = self.remove_tashkeel(text)
+        tokens = nltk.word_tokenize(text)
+        cleaned_tokens = [token for token in tokens if token.isalnum()]
+        lowercase_tokens = [token.lower() for token in cleaned_tokens]
+        return " ".join(lowercase_tokens)
+    def predict_label(self, input_text, threshold=0.5):
+        input_text = self.preprocess_arabic_text(input_text)
+        input_vector = self.vectorizer.transform([input_text])
+        similarities = cosine_similarity(input_vector, self.X).flatten()
+        max_index = np.argmax(similarities)
+        max_similarity = similarities[max_index]
+        if max_similarity >= threshold:
+            return self.labels.iloc[max_index]
+        else:
+            return "No similar text found in dataset"
+    def classify_hadith(self, input_text):
+        return self.predict_label(input_text)
+if __name__ == "__main__":
+    # Initialize the app
+    hadith_classification_app = HadithClassificationApp()
+    # Set up the Gradio interface
+    iface = gr.Interface(
+        fn=hadith_classification_app.classify_hadith,
+        inputs="text",
+        outputs="text",
+        title="Hadith Classification App",
+        description="Classify Hadith text based on pre-trained model."
+    )
+    # Launch the Gradio interface
+    iface.launch()