Spaces:

Mohamed-Maher
/

Hadith_Classification

Sleeping

App Files Files Community

Mohamed-Maher commited on Jun 15, 2024

Commit

35cfe27

verified ·

1 Parent(s): cdcb7c7

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -58

app.py CHANGED Viewed

@@ -7,61 +7,68 @@ import nltk
 import gradio as gr
 from sklearn.metrics.pairwise import cosine_similarity
-class HadithClassificationApp:
-    def __init__(self):
-        # Download NLTK resources if needed
-        nltk.download('punkt')
-        # Load the dataset and labels
-        self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
-        self.labels = self.dataset['Arabic_Grade']
-        # Load the models
-        with open("tfidf_vectorizer.pkl", "rb") as f:
-            self.vectorizer = pickle.load(f)
-        with open("cosine_similarity_model.pkl", "rb") as f:
-            self.X = pickle.load(f)
-    @staticmethod
-    def remove_tashkeel(text):
-        tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
-        return re.sub(tashkeel_pattern, '', text)
-    def preprocess_arabic_text(self, text):
-        text = self.remove_tashkeel(text)
-        tokens = nltk.word_tokenize(text)
-        cleaned_tokens = [token for token in tokens if token.isalnum()]
-        lowercase_tokens = [token.lower() for token in cleaned_tokens]
-        return " ".join(lowercase_tokens)
-    def predict_label(self, input_text, threshold=0.5):
-        input_text = self.preprocess_arabic_text(input_text)
-        input_vector = self.vectorizer.transform([input_text])
-        similarities = cosine_similarity(input_vector, self.X).flatten()
-        max_index = np.argmax(similarities)
-        max_similarity = similarities[max_index]
-        if max_similarity >= threshold:
-            return self.labels.iloc[max_index]
-        else:
-            return "No similar text found in dataset"
-    def classify_hadith(self, input_text):
-        return self.predict_label(input_text)
-if __name__ == "__main__":
-    # Initialize the app
-    hadith_classification_app = HadithClassificationApp()
-    # Set up the Gradio interface
-    iface = gr.Interface(
-        fn=hadith_classification_app.classify_hadith,
-        inputs="text",
-        outputs="text",
-        title="Hadith Classification App",
-        description="Classify Hadith text based on pre-trained model."
-    )
-    # Launch the Gradio interface
-    iface.launch()

 import gradio as gr
 from sklearn.metrics.pairwise import cosine_similarity
+import os
+current_path = os.getcwd()
+print(f"The current working directory is: {current_path}")
+# class HadithClassificationApp:
+#     def __init__(self):
+#         # Download NLTK resources if needed
+#         nltk.download('punkt')
+#         # Load the dataset and labels
+#         self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
+#         self.labels = self.dataset['Arabic_Grade']
+#         # Load the models
+#         with open("tfidf_vectorizer.pkl", "rb") as f:
+#             self.vectorizer = pickle.load(f)
+#         with open("cosine_similarity_model.pkl", "rb") as f:
+#             self.X = pickle.load(f)
+#     @staticmethod
+#     def remove_tashkeel(text):
+#         tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
+#         return re.sub(tashkeel_pattern, '', text)
+#     def preprocess_arabic_text(self, text):
+#         text = self.remove_tashkeel(text)
+#         tokens = nltk.word_tokenize(text)
+#         cleaned_tokens = [token for token in tokens if token.isalnum()]
+#         lowercase_tokens = [token.lower() for token in cleaned_tokens]
+#         return " ".join(lowercase_tokens)
+#     def predict_label(self, input_text, threshold=0.5):
+#         input_text = self.preprocess_arabic_text(input_text)
+#         input_vector = self.vectorizer.transform([input_text])
+#         similarities = cosine_similarity(input_vector, self.X).flatten()
+#         max_index = np.argmax(similarities)
+#         max_similarity = similarities[max_index]
+#         if max_similarity >= threshold:
+#             return self.labels.iloc[max_index]
+#         else:
+#             return "No similar text found in dataset"
+#     def classify_hadith(self, input_text):
+#         return self.predict_label(input_text)
+# if __name__ == "__main__":
+#     # Initialize the app
+#     hadith_classification_app = HadithClassificationApp()
+#     # Set up the Gradio interface
+#     iface = gr.Interface(
+#         fn=hadith_classification_app.classify_hadith,
+#         inputs="text",
+#         outputs="text",
+#         title="Hadith Classification App",
+#         description="Classify Hadith text based on pre-trained model."
+#     )
+#     # Launch the Gradio interface
+#     iface.launch()