File size: 1,111 Bytes
55af729
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import re
import json
import streamlit as st
from scraping_needs import get_content

@st.cache_data(ttl=7800) # cache clears after 7800s
def trends_mwn(user_agent):
    
    #page uses javascript
    str_main_list = get_content("https://www.moroccoworldnews.com/home/post/zheadlines",user_agent,1) # we want text not html
    
    main_list = json.loads(str_main_list)
    
    
    cards_content = []

    for card in main_list:
        content_dict = {}

        content_dict["image_link"] = card["thumb"]
        content_dict["title"] = card["post_title"]

        try:
            tmp = card["tsize"]
        except KeyError:
            try:
                tmp = card["msize"]
            except KeyError:
                tmp = card["lsize"]
        year = re.search('20\d\d(?=[\\\/])',tmp).group() 
        month = re.search('(?<=[\\\/])\d{1,2}(?=[\\\/])',tmp).group()

        content_dict["article_link"] = "https://www.moroccoworldnews.com/" + str(year) + "/" + str(month) + "/" + str(card["ID"]) + "/" + card["post_name"]
        
        cards_content.append(content_dict)

    return cards_content