Spaces:
Sleeping
Sleeping
updated news parsing
Browse files- app.py +12 -16
- wna_googlenews.py +1 -2
app.py
CHANGED
|
@@ -46,37 +46,33 @@ with st.sidebar:
|
|
| 46 |
|
| 47 |
|
| 48 |
if st.button("Search"):
|
|
|
|
| 49 |
# display a loading progress
|
| 50 |
with st.spinner("Loading last news ..."):
|
| 51 |
-
|
| 52 |
-
|
| 53 |
with st.spinner("Processing received news ..."):
|
| 54 |
-
|
| 55 |
-
# get each title colums
|
| 56 |
-
sentences = df["title"]
|
| 57 |
-
# convert into array
|
| 58 |
-
sentences = sentences.tolist()
|
| 59 |
-
# st.write(sentences)
|
| 60 |
-
# create new dataframe
|
| 61 |
-
df = pd.DataFrame(columns=["sentence", "best","second"])
|
| 62 |
# loop on each sentence and call classifier
|
| 63 |
-
for
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
| 66 |
cur_result = model_outputs[0]
|
| 67 |
#st.write(cur_result)
|
| 68 |
# get label 1
|
| 69 |
label = cur_result[0]['label']
|
| 70 |
score = cur_result[0]['score']
|
| 71 |
percentage = round(score * 100, 2)
|
| 72 |
-
str1 = label + " " + str(percentage)
|
| 73 |
# get label 2
|
| 74 |
label = cur_result[1]['label']
|
| 75 |
score = cur_result[1]['score']
|
| 76 |
percentage = round(score * 100, 2)
|
| 77 |
-
str2 = label + " " + str(percentage)
|
| 78 |
# insert cur_sentence and cur_result into dataframe
|
| 79 |
-
df.loc[len(df.index)] = [cur_sentence, str1, str2]
|
| 80 |
|
| 81 |
# write info on the output
|
| 82 |
st.write("Number of sentences:", len(df))
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
if st.button("Search"):
|
| 49 |
+
classifier = pipeline(task="text-classification", model=settings["model"], top_k=None)
|
| 50 |
# display a loading progress
|
| 51 |
with st.spinner("Loading last news ..."):
|
| 52 |
+
allnews = wna.get_news(settings, query)
|
| 53 |
+
st.dataframe(allnews)
|
| 54 |
with st.spinner("Processing received news ..."):
|
| 55 |
+
df = pd.DataFrame(columns=["sentence", "date","best","second"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
# loop on each sentence and call classifier
|
| 57 |
+
for curnews in allnews:
|
| 58 |
+
#st.write(curnews)
|
| 59 |
+
cur_sentence = curnews["title"]
|
| 60 |
+
cur_date = curnews["date"]
|
| 61 |
+
model_outputs = classifier(cur_sentence)
|
| 62 |
cur_result = model_outputs[0]
|
| 63 |
#st.write(cur_result)
|
| 64 |
# get label 1
|
| 65 |
label = cur_result[0]['label']
|
| 66 |
score = cur_result[0]['score']
|
| 67 |
percentage = round(score * 100, 2)
|
| 68 |
+
str1 = label + " (" + str(percentage) + ")%"
|
| 69 |
# get label 2
|
| 70 |
label = cur_result[1]['label']
|
| 71 |
score = cur_result[1]['score']
|
| 72 |
percentage = round(score * 100, 2)
|
| 73 |
+
str2 = label + " (" + str(percentage) + ")%"
|
| 74 |
# insert cur_sentence and cur_result into dataframe
|
| 75 |
+
df.loc[len(df.index)] = [cur_sentence, cur_date, str1, str2]
|
| 76 |
|
| 77 |
# write info on the output
|
| 78 |
st.write("Number of sentences:", len(df))
|
wna_googlenews.py
CHANGED
|
@@ -19,5 +19,4 @@ def get_news(settings, query):
|
|
| 19 |
page_result = googlenews.page_at(page)
|
| 20 |
# merge dat
|
| 21 |
final_list = final_list + page_result
|
| 22 |
-
|
| 23 |
-
return df
|
|
|
|
| 19 |
page_result = googlenews.page_at(page)
|
| 20 |
# merge dat
|
| 21 |
final_list = final_list + page_result
|
| 22 |
+
return final_list
|
|
|