bushra1dajam commited on
Commit
afaca84
·
verified ·
1 Parent(s): 672d32b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -0
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import random
3
+ import openai
4
+ import joblib
5
+ import re
6
+ from nltk.corpus import stopwords
7
+ from nltk.tokenize import word_tokenize
8
+ import nltk
9
+
10
+ nltk.download('stopwords')
11
+ nltk.download('punkt')
12
+
13
+ StopWords = set(stopwords.words('arabic'))
14
+ # Set your OpenAI API key here
15
+ openai.api_key = 'sk-proj-oUMTpRuEUBnl9LUbjBGTT3BlbkFJ4mFy0AeXnTVQGVr7V31U'
16
+
17
+
18
+ # Load the pipeline
19
+ pipeLine = joblib.load('model_pipeline.joblib')
20
+
21
+ # Text Preprocessor Class
22
+ class TextPreprocessor:
23
+ def __init__(self):
24
+ self.StopWords = set(stopwords.words('arabic'))
25
+ self.ArabicDiacritics = re.compile(r"""
26
+ ّ | # Tashdid
27
+ َ | # Fatha
28
+ ً | # Tanwin Fath
29
+ ُ | # Damma
30
+ ٌ | # Tanwin Damm
31
+ ِ | # Kasra
32
+ ٍ | # Tanwin Kasr
33
+ ْ | # Sukun
34
+ ـ # Tatwil/Kashida
35
+ """, re.VERBOSE)
36
+ self.RegrexPattern = re.compile(
37
+ r"[\U0001F600-\U0001F64F" + # emoticons {😀 , 😆}
38
+ r"\U0001F300-\U0001F5FF" + # symbols & pictographs {🌍 , 🌞}
39
+ r"\U0001F680-\U0001F6FF" + # transport & map symbols {🚌 , 🚕 }
40
+ r"\U0001F1E0-\U0001F1FF]", # flags (iOS) { 🇺🇸 , 🇨🇦 }
41
+ re.UNICODE
42
+ )
43
+
44
+ def preprocess_text(self, text):
45
+ # Remove special characters {& $ @} and punctuation {. , ? !}
46
+ text = re.sub(r'[^\w\s]', '', text)
47
+
48
+ # Remove emoji characters
49
+ text = re.sub(self.RegrexPattern, '', text)
50
+
51
+ # Remove Arabic diacritics
52
+ text = re.sub(self.ArabicDiacritics, '', text)
53
+ tokens = word_tokenize(text)
54
+ tokens = [word for word in tokens if word not in self.StopWords]
55
+ return ' '.join(tokens)
56
+
57
+ preprocessor = TextPreprocessor()
58
+
59
+ category_mapping = {
60
+ 0: 'Culture',
61
+ 1: 'Finance',
62
+ 2: 'Medical',
63
+ 3: 'Politics',
64
+ 4: 'Religion',
65
+ 5: 'Sports',
66
+ 6: 'Tech'
67
+ }
68
+
69
+ def classify_article(article_text, pipeline):
70
+ # Preprocess the texts
71
+ preprocessed_text = preprocessor.preprocess_text(article_text)
72
+ predicted_category = pipeline.predict([preprocessed_text])[0]
73
+ return category_mapping.get(predicted_category, "Unknown")
74
+
75
+ def classification_page():
76
+ st.title("Text Classification")
77
+ st.write("Enter text below to classify it into categories like Culture, Finance, Medical, Politics, Religion, Sports, and Tech.")
78
+
79
+ # Text input from user
80
+ input_text = st.text_area("Text Input", height=200)
81
+
82
+ # Button to trigger classification
83
+ if st.button("Classify"):
84
+ if input_text:
85
+ with st.spinner("Classifying..."):
86
+ category = classify_article(input_text, pipeLine)
87
+ st.write("### Predicted Category")
88
+ st.write(category)
89
+ else:
90
+ st.warning("Please enter some text to classify.")
91
+
92
+ # Function to generate summary using OpenAI
93
+ def generate_summary(text):
94
+ response = openai.ChatCompletion.create(
95
+ model="gpt-3.5-turbo", # Default model
96
+ messages=[
97
+ {"role": "system", "content": "You are a helpful assistant that summarizes text."},
98
+ {"role": "user", "content": text}
99
+ ],
100
+ temperature=0.7, # Default temperature
101
+ max_tokens=150, # Default max tokens
102
+ top_p=1.0,
103
+ frequency_penalty=0.0,
104
+ presence_penalty=0.0
105
+ )
106
+ return response.choices[0].message['content'].strip()
107
+
108
+ # Function for the summarization page
109
+ def summarization_page():
110
+ st.title("Text Summarization with OpenAI GPT")
111
+ st.write("Enter text below and click 'Summarize' to generate a summary.")
112
+
113
+ # Text input from user
114
+ input_text = st.text_area("Text Input", height=200)
115
+
116
+ # Button to trigger summarization
117
+ if st.button("Summarize"):
118
+ if input_text:
119
+ with st.spinner("Generating summary..."):
120
+ summary = generate_summary(input_text)
121
+ st.write("### Summary")
122
+ st.write(summary)
123
+ else:
124
+ st.warning("Please enter some text to summarize.")
125
+
126
+ def generate_questions(user_text):
127
+ questions = [
128
+ {
129
+ "question": "ما هو اسم الكتاب الذي حصل على جائزة عربية على هامش افتتاح معرض بيروت العربي الدولي للكتاب ؟",
130
+ "options": ["الحضارة الاسلامية", "المصحف وقراءاته", "مؤمنون بلا حدود", "عبد المجيد الشرقي"],
131
+ "answer": "المصحف وقراءاته"
132
+ },
133
+ {
134
+ "question": "من الذي حصل أشرف على تصنيف كتاب المصحف وقراءاته ؟",
135
+ "options": ["عبد المجيد الشرقي", "النادي الثقافي العربي", "مؤسسة مؤمنون بلا حدود", "مجموعة من الباحثين"],
136
+ "answer": "عبد المجيد الشرقي"
137
+ },
138
+ {
139
+ "question": "كم عدد مجلدات كتاب المصحف وقراءاته ؟",
140
+ "options": ["ثلاثة مجلدات", "أربعة مجلدات", "خمسة مجلدات", "ستة مجلدات"],
141
+ "answer": "خمسة مجلدات"
142
+ }
143
+ ]
144
+ return questions
145
+
146
+ def quiz_page():
147
+ st.title("Simple Quiz")
148
+
149
+ user_text = st.text_area("Enter your text here:", height=150)
150
+
151
+ if st.button("Generate Questions"):
152
+ if user_text:
153
+ questions = generate_questions(user_text)
154
+ st.session_state.questions = questions
155
+ st.session_state.current_question = None
156
+ st.session_state.score = 0
157
+ st.session_state.asked_questions = []
158
+
159
+ if 'questions' in st.session_state and len(st.session_state.questions) > 0:
160
+ if st.button("Ask a Question"):
161
+ if len(st.session_state.asked_questions) < len(st.session_state.questions):
162
+ available_questions = [q for q in st.session_state.questions if q not in st.session_state.asked_questions]
163
+ st.session_state.current_question = random.choice(available_questions)
164
+ st.session_state.asked_questions.append(st.session_state.current_question)
165
+ else:
166
+ st.write("All questions have been asked!")
167
+
168
+ if st.session_state.current_question:
169
+ question = st.session_state.current_question
170
+ st.write(f"Question: {question['question']}")
171
+ user_answer = st.radio("Choose your answer:", question['options'], key="answer")
172
+
173
+ if st.button("Submit Answer"):
174
+ if user_answer == question['answer']:
175
+ st.session_state.score += 1
176
+ st.session_state.current_question = None
177
+
178
+ if st.button("Finish Quiz"):
179
+ st.write(f"Your final score is {st.session_state.score} out of {len(st.session_state.asked_questions)}")
180
+ st.session_state.score = 0
181
+ st.session_state.asked_questions = []
182
+ st.session_state.questions = []
183
+
184
+ # Add navigation
185
+ page = st.sidebar.selectbox("Choose a page", ["Classification", "Summarization", "Quiz"])
186
+
187
+ if page == "Classification":
188
+ classification_page()
189
+ elif page == "Summarization":
190
+ summarization_page()
191
+ else:
192
+ quiz_page()