Spaces:

Ralfouzan
/

testing

Runtime error

App Files Files Community

Ralfouzan commited on Dec 25, 2021

Commit

5792bf1

1 Parent(s): 037ac5a

Upload app.py

Browse files

Files changed (1) hide show

app.py +199 -0

app.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import streamlit as st
+from GoogleNews import GoogleNews
+import pandas as pd
+import numpy as np
+import spacy
+import gensim
+import string
+import re
+import sklearn
+from sklearn.metrics import classification_report, recall_score, precision_score, accuracy_score, f1_score
+from sklearn.metrics.pairwise import cosine_similarity
+nlp = spacy.load("spacy.aravec.model")
+#---------------------------------------------------------------------------------------------------------------
+#----------------------------------------------  Side bar ------------------------------------------------------
+#---------------------------------------------------------------------------------------------------------------
+st.sidebar.markdown('مواقع اخباريه معتمده ')
+st.sidebar.markdown("[العربية](https://www.alarabiya.net/)")
+st.sidebar.markdown("[الجزيرة نت](https://www.aljazeera.net/news/)")
+st.sidebar.markdown("[وكالة الانباء الكويتية](https://www.kuna.net.kw/Default.aspx?language=ar)")
+#---------------------------------------------------------------------------------------------------------------
+st.write("""
+Arabic headline news detection
+""")
+tx = st.text_input (''' الرجاء ادخال العنوان المراد التاكد من صحته ''')
+#---------------------------------------------------------------------------------------------------------------
+#----------------------------------------Pre-proccessing functions----------------------------------------------
+#---------------------------------------------------------------------------------------------------------------
+def clean_str(text):
+    search = ["أ","إ","آ","ة","_","-","/",".","،"," و "," يا ",'"',"ـ","'","ى","\\",'\n', '\t','&quot;','?','؟','!']
+    replace = ["ا","ا","ا","ه"," "," ","","",""," و"," يا","","","","ي","",' ', ' ',' ',' ? ',' ؟ ',' ! ']
+    #remove tashkeel
+    p_tashkeel = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
+    text = re.sub(p_tashkeel,"", text)
+    #remove longation
+    p_longation = re.compile(r'(.)\1+')
+    subst = r"\1\1"
+    text = re.sub(p_longation, subst, text)
+    text = text.replace('وو', 'و')
+    text = text.replace('يي', 'ي')
+    text = text.replace('اا', 'ا')
+    for i in range(0, len(search)):
+        text = text.replace(search[i], replace[i])
+    #trim
+    text = text.strip()
+    return text
+def split_hashtag_to_words(tag):
+    tag = tag.replace('#','')
+    tags = tag.split('_')
+    if len(tags) > 1 :
+        return tags
+    pattern = re.compile(r"[A-Z][a-z]+|\d+|[A-Z]+(?![a-z])")
+    return pattern.findall(tag)
+def clean_hashtag(text):
+    words = text.split()
+    text = list()
+    for word in words:
+        if is_hashtag(word):
+            text.extend(extract_hashtag(word))
+        else:
+            text.append(word)
+    return " ".join(text)
+def is_hashtag(word):
+    if word.startswith("#"):
+        return True
+    else:
+        return False
+def extract_hashtag(text):
+    hash_list = ([re.sub(r"(\W+)$", "", i) for i in text.split() if i.startswith("#")])
+    word_list = []
+    for word in hash_list :
+        word_list.extend(split_hashtag_to_words(word))
+    return word_list
+# Define the preprocessing Class
+class Preprocessor:
+    def __init__(self, tokenizer, **cfg):
+        self.tokenizer = tokenizer
+    def __call__(self, text):
+        preprocessed = clean_str(text)
+        return self.tokenizer(preprocessed)
+#---------------------------------------------------------------------------------------------------------------
+#----------------------------------------- END OF PRE-PROCESSING------------------------------------------------
+#---------------------------------------------------------------------------------------------------------------
+# Apply the `Preprocessor` Class
+nlp.tokenizer = Preprocessor(nlp.tokenizer)
+if len(tx) != 0:
+    googlenews = GoogleNews(lang='ar')
+    googlenews.clear()
+    f =0
+    Prediction =''
+    top_similar_ind =''
+    top_similar_news =''
+    medium =''
+    top_similar_ind2 =''
+    tp_desc =''
+    st.markdown(f"Searching for: { tx         }")
+    st.markdown(f"ــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــــ")
+    tx = clean_hashtag(tx)
+    tx = clean_str(tx)
+    googlenews.search(tx)
+    result = googlenews.page_at(1)
+    googlenews.clear()
+    if len(result) == 0:
+      Prediction ='الخبر زائف'
+      top_similar_news ='لا يوجد اخبار مماثله'
+      medium ='لا يوجد مصدر'
+      tp_desc ='لا يوجد وصف'
+    else:
+      result_text = {"Text":[]}
+        #google search
+      for i in range(len(result)):
+        title =result[i]['title']
+        result_text['Text'].append(title)
+      result_text2 = {"Text":[]}
+        #google search
+      for i in range(len(result)):
+        desc =result[i]['desc']
+        result_text2['Text'].append(desc)
+      result_text = pd.DataFrame(result_text)
+      result_text2 = pd.DataFrame(result_text2)
+      data = pd.DataFrame()
+      data['Text2'] = result_text['Text'].copy()
+      data['Text2'] = data['Text2'].apply(lambda x: nlp(x).similarity(nlp(tx)))
+      sg300top = data['Text2'].max(axis = 0)
+      top_similar_ind = np.argmax(data['Text2'])
+      top_similar_news = result[top_similar_ind]['title']
+      descr = result[top_similar_ind]['desc']
+      medium = result[top_similar_ind]['media']
+      date = result[top_similar_ind]['date']
+      link = result[top_similar_ind]['link']
+      data['Text3'] = result_text2['Text'].copy()
+      data['Text3'] = data['Text3'].apply(lambda x: nlp(x).similarity(nlp(tx)))
+      sg300top2 = data['Text3'].max(axis = 0)
+      top_similar_ind2 = np.argmax(data['Text3'])
+      tp_desc = result[top_similar_ind2]['desc']
+      if sg300top >= .85 or sg300top2 >= .85 :
+        Prediction ='الخبر صحيح'
+      else:
+        Prediction =' الخبر زائف'
+    st.markdown(f"System Prediction : { Prediction         }")
+    st.markdown(f"الخبر المماثل: { top_similar_news         }")
+    st.markdown(f"")
+    st.markdown(f"تاريخ الخبر: { date         }")
+    st.markdown(f"")
+    st.markdown(f"التفصيل: { descr         }")
+    st.markdown(f"")
+    st.markdown(f"المصدر: { medium         }")
+    st.markdown(f"")
+    st.markdown(f"رابط الخبر: { link         }")
+#st.markdown(f"Searching for: { tx         }")