Mohamed-Maher commited on
Commit
35cfe27
·
verified ·
1 Parent(s): cdcb7c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -58
app.py CHANGED
@@ -7,61 +7,68 @@ import nltk
7
  import gradio as gr
8
  from sklearn.metrics.pairwise import cosine_similarity
9
 
10
- class HadithClassificationApp:
11
- def __init__(self):
12
- # Download NLTK resources if needed
13
- nltk.download('punkt')
14
-
15
- # Load the dataset and labels
16
- self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
17
- self.labels = self.dataset['Arabic_Grade']
18
-
19
- # Load the models
20
- with open("tfidf_vectorizer.pkl", "rb") as f:
21
- self.vectorizer = pickle.load(f)
22
- with open("cosine_similarity_model.pkl", "rb") as f:
23
- self.X = pickle.load(f)
24
-
25
- @staticmethod
26
- def remove_tashkeel(text):
27
- tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
28
- return re.sub(tashkeel_pattern, '', text)
29
-
30
- def preprocess_arabic_text(self, text):
31
- text = self.remove_tashkeel(text)
32
- tokens = nltk.word_tokenize(text)
33
- cleaned_tokens = [token for token in tokens if token.isalnum()]
34
- lowercase_tokens = [token.lower() for token in cleaned_tokens]
35
- return " ".join(lowercase_tokens)
36
-
37
- def predict_label(self, input_text, threshold=0.5):
38
- input_text = self.preprocess_arabic_text(input_text)
39
- input_vector = self.vectorizer.transform([input_text])
40
- similarities = cosine_similarity(input_vector, self.X).flatten()
41
-
42
- max_index = np.argmax(similarities)
43
- max_similarity = similarities[max_index]
44
-
45
- if max_similarity >= threshold:
46
- return self.labels.iloc[max_index]
47
- else:
48
- return "No similar text found in dataset"
49
-
50
- def classify_hadith(self, input_text):
51
- return self.predict_label(input_text)
52
-
53
- if __name__ == "__main__":
54
- # Initialize the app
55
- hadith_classification_app = HadithClassificationApp()
56
-
57
- # Set up the Gradio interface
58
- iface = gr.Interface(
59
- fn=hadith_classification_app.classify_hadith,
60
- inputs="text",
61
- outputs="text",
62
- title="Hadith Classification App",
63
- description="Classify Hadith text based on pre-trained model."
64
- )
65
-
66
- # Launch the Gradio interface
67
- iface.launch()
 
 
 
 
 
 
 
 
7
  import gradio as gr
8
  from sklearn.metrics.pairwise import cosine_similarity
9
 
10
+ import os
11
+
12
+ current_path = os.getcwd()
13
+
14
+ print(f"The current working directory is: {current_path}")
15
+
16
+
17
+ # class HadithClassificationApp:
18
+ # def __init__(self):
19
+ # # Download NLTK resources if needed
20
+ # nltk.download('punkt')
21
+
22
+ # # Load the dataset and labels
23
+ # self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
24
+ # self.labels = self.dataset['Arabic_Grade']
25
+
26
+ # # Load the models
27
+ # with open("tfidf_vectorizer.pkl", "rb") as f:
28
+ # self.vectorizer = pickle.load(f)
29
+ # with open("cosine_similarity_model.pkl", "rb") as f:
30
+ # self.X = pickle.load(f)
31
+
32
+ # @staticmethod
33
+ # def remove_tashkeel(text):
34
+ # tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
35
+ # return re.sub(tashkeel_pattern, '', text)
36
+
37
+ # def preprocess_arabic_text(self, text):
38
+ # text = self.remove_tashkeel(text)
39
+ # tokens = nltk.word_tokenize(text)
40
+ # cleaned_tokens = [token for token in tokens if token.isalnum()]
41
+ # lowercase_tokens = [token.lower() for token in cleaned_tokens]
42
+ # return " ".join(lowercase_tokens)
43
+
44
+ # def predict_label(self, input_text, threshold=0.5):
45
+ # input_text = self.preprocess_arabic_text(input_text)
46
+ # input_vector = self.vectorizer.transform([input_text])
47
+ # similarities = cosine_similarity(input_vector, self.X).flatten()
48
+
49
+ # max_index = np.argmax(similarities)
50
+ # max_similarity = similarities[max_index]
51
+
52
+ # if max_similarity >= threshold:
53
+ # return self.labels.iloc[max_index]
54
+ # else:
55
+ # return "No similar text found in dataset"
56
+
57
+ # def classify_hadith(self, input_text):
58
+ # return self.predict_label(input_text)
59
+
60
+ # if __name__ == "__main__":
61
+ # # Initialize the app
62
+ # hadith_classification_app = HadithClassificationApp()
63
+
64
+ # # Set up the Gradio interface
65
+ # iface = gr.Interface(
66
+ # fn=hadith_classification_app.classify_hadith,
67
+ # inputs="text",
68
+ # outputs="text",
69
+ # title="Hadith Classification App",
70
+ # description="Classify Hadith text based on pre-trained model."
71
+ # )
72
+
73
+ # # Launch the Gradio interface
74
+ # iface.launch()