File size: 14,823 Bytes
b95e413
 
 
 
 
 
 
 
d649f39
b95e413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d649f39
 
b95e413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import gradio as gr
import requests
from bs4 import BeautifulSoup
import time
import threading
from datetime import datetime
import re

# HTML'den linkleri çıkar - Güncel liste (36 link)
HTML_CONTENT = """
<div class="yazarlar">
    <ul class="liste arali">
        <li><a href="/irfan-basaranoglu/tanri-gibi-hissetmek">TANRI GİBİ HİSSETMEK<time datetime="2025-10-31T03:34:00+03:00" style="display:block;padding-top:5px">31 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/tasli-tarla">TAŞLI TARLA<time datetime="2025-10-30T01:37:00+03:00" style="display:block;padding-top:5px">30 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/kaybolan-huzur">KAYBOLAN HUZUR<time datetime="2025-10-29T02:14:00+03:00" style="display:block;padding-top:5px">29 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/bu-kalp-sizi-unutur-mu">BU KALP SİZİ UNUTUR MU?<time datetime="2025-10-28T16:22:00+03:00" style="display:block;padding-top:5px">28 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/siz-de-eve-gideceksiniz">SİZ DE EVE GİDECEKSİNİZ<time datetime="2025-10-27T02:27:00+03:00" style="display:block;padding-top:5px">27 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/huzur-hakki">HUZUR HAKKI<time datetime="2025-10-26T01:05:00+03:00" style="display:block;padding-top:5px">26 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/sari-aganin-sari-traktoru">SARI AĞANIN SARI TRAKTÖRÜ<time datetime="2025-10-25T16:02:00+03:00" style="display:block;padding-top:5px">25 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/ruzgr-gulu">RÜZGÂR GÜLÜ<time datetime="2025-10-24T13:46:00+03:00" style="display:block;padding-top:5px">24 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/eski-bir-yara">ESKİ BİR YARA<time datetime="2025-10-23T00:32:00+03:00" style="display:block;padding-top:5px">23 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/mudur-osman">MÜDÜR OSMAN<time datetime="2025-10-22T13:42:00+03:00" style="display:block;padding-top:5px">22 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/bu-dunyadan-bir-veysel-gecti">BU DÜNYADAN BİR VEYSEL GEÇTİ<time datetime="2025-10-21T01:10:00+03:00" style="display:block;padding-top:5px">21 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/mukayese">MUKAYESE<time datetime="2025-10-20T02:23:00+03:00" style="display:block;padding-top:5px">20 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/marangoz-cemile">MARANGOZ CEMİLE<time datetime="2025-10-19T11:50:00+03:00" style="display:block;padding-top:5px">19 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/keske-kalbim-sende-yasayabilseydi">KEŞKE KALBİM SENDE YAŞAYABİLSEYDİ<time datetime="2025-10-18T07:22:00+03:00" style="display:block;padding-top:5px">18 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/lacivert-kaban">LACİVERT KABAN<time datetime="2025-10-17T12:38:00+03:00" style="display:block;padding-top:5px">17 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/kucuk-jandarma">KÜÇÜK JANDARMA<time datetime="2025-10-16T12:53:00+03:00" style="display:block;padding-top:5px">16 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/komur-karasi-2">KÖMÜR KARASI<time datetime="2025-10-15T06:54:00+03:00" style="display:block;padding-top:5px">15 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/komur-karasi-gozlerini-ozledim">KÖMÜR KARASI GÖZLERİNİ ÖZLEDİM<time datetime="2025-10-14T07:51:00+03:00" style="display:block;padding-top:5px">14 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/kirve">KİRVE<time datetime="2025-10-12T10:28:00+03:00" style="display:block;padding-top:5px">12 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/kirmizi-elma">KIRMIZI ELMA<time datetime="2025-10-10T23:30:00+03:00" style="display:block;padding-top:5px">10 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/kariyer-3">KARİYER<time datetime="2025-10-09T22:06:00+03:00" style="display:block;padding-top:5px">9 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/kinali-kuzu-2">KINALI KUZU<time datetime="2025-10-09T09:14:00+03:00" style="display:block;padding-top:5px">9 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/boyle-evlat-olmaz-olsun">BÖYLE EVLAT OLMAZ OLSUN<time datetime="2025-10-08T00:13:00+03:00" style="display:block;padding-top:5px">8 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/kanarya-sesli-kapi-zili">KANARYA SESLİ KAPI ZİLİ<time datetime="2025-10-07T13:50:00+03:00" style="display:block;padding-top:5px">7 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/kafamdaki-hapishane">KAFAMDAKİ HAPİSHANE<time datetime="2025-10-06T08:21:00+03:00" style="display:block;padding-top:5px">6 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/hicbir-lamba-gunes-kadar-aydinlatamaz">HİÇBİR LAMBA GÜNEŞ KADAR AYDINLATAMAZ<time datetime="2025-10-05T10:58:00+03:00" style="display:block;padding-top:5px">5 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/hayat-iste-simdi-basliyor">Hayat işte şimdi başlıyor<time datetime="2025-10-04T10:01:00+03:00" style="display:block;padding-top:5px">4 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/fenerin-bekcisi">FENERİN BEKÇİSİ<time datetime="2025-10-03T09:04:00+03:00" style="display:block;padding-top:5px">3 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/fatmanin-hayat-mucadelesi">FATMA&#39;NIN HAYAT MÜCADELESİ<time datetime="2025-10-02T09:00:00+03:00" style="display:block;padding-top:5px">2 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/evinin-kaptani-ol">EVİNİN KAPTANI OL<time datetime="2025-10-01T08:39:00+03:00" style="display:block;padding-top:5px">1 Ekim 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/ese-nene">EŞE NENE<time datetime="2025-09-30T13:49:00+03:00" style="display:block;padding-top:5px">30 Eylül 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/emine-kadin">EMİNE KADIN<time datetime="2025-09-29T10:13:00+03:00" style="display:block;padding-top:5px">29 Eylül 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/degirmen">DEĞİRMEN<time datetime="2025-09-28T14:20:00+03:00" style="display:block;padding-top:5px">28 Eylül 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/colun-ortasinda-bir-vaha">Çölün ortasında bir vaha<time datetime="2025-09-27T11:20:00+03:00" style="display:block;padding-top:5px">27 Eylül 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/cedene">ÇEDENE<time datetime="2025-09-26T09:42:00+03:00" style="display:block;padding-top:5px">26 Eylül 2025</time></a></li>
        <li><a href="/irfan-basaranoglu/cemal-2">CEMAL<time datetime="2025-09-25T10:32:00+03:00" style="display:block;padding-top:5px">25 Eylül 2025</time></a></li>
    </ul>
</div>
"""

# Base URL - gerçek site URL'i
BASE_URL = "https://malatyayenises.com"

# Global değişkenler
is_running = False
scraping_thread = None
status_log = []

def parse_urls_from_html(html_content):
    """HTML içeriğinden URL'leri çıkar"""
    soup = BeautifulSoup(html_content, 'html.parser')
    urls = []
    
    for link in soup.find_all('a', href=True):
        href = link.get('href')
        text = link.get_text(strip=True)
        # Time elementini çıkar
        time_elem = link.find('time')
        if time_elem:
            text = text.replace(time_elem.get_text(strip=True), '').strip()
        
        if href and href.startswith('/irfan-basaranoglu/'):
            full_url = BASE_URL + href
            urls.append({
                'url': full_url,
                'title': text,
                'path': href
            })
    
    return urls

def scrape_page(url_info):
    """Bir sayfayı scrape et"""
    try:
        response = requests.get(url_info['url'], timeout=10, headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        })
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Sayfa başlığını al
        title = soup.find('title')
        title_text = title.get_text(strip=True) if title else url_info['title']
        
        # Ana içeriği al (içeriğe göre ayarlanabilir)
        content = soup.find('article') or soup.find('div', class_='content') or soup.find('main')
        content_text = content.get_text(strip=True)[:500] if content else "İçerik bulunamadı"
        
        return {
            'success': True,
            'url': url_info['url'],
            'title': title_text,
            'preview': content_text,
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }
    except Exception as e:
        return {
            'success': False,
            'url': url_info['url'],
            'error': str(e),
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }

def continuous_scraping(interval_seconds, url_list, status_callback=None):
    """Sürekli olarak sayfaları ziyaret et"""
    global is_running
    
    url_index = 0
    while is_running:
        if url_list:
            url_info = url_list[url_index % len(url_list)]
            result = scrape_page(url_info)
            
            status_msg = f"[{result['timestamp']}] Ziyaret: {url_info['title'][:50]}..."
            if result['success']:
                status_msg += f" ✓ Başarılı"
            else:
                status_msg += f" ✗ Hata: {result.get('error', 'Bilinmeyen')}"
            
            status_log.append(status_msg)
            if len(status_log) > 100:  # Son 100 kaydı tut
                status_log.pop(0)
            
            if status_callback:
                status_callback("\n".join(status_log[-20:]))  # Son 20 satırı göster
            
            url_index += 1
            time.sleep(interval_seconds)
        else:
            time.sleep(1)

def start_scraping(base_url, interval):
    """Scraping'i başlat"""
    global is_running, scraping_thread
    
    if is_running:
        return "Zaten çalışıyor!", "\n".join(status_log[-20:]) if status_log else "Henüz kayıt yok."
    
    # Base URL'yi güncelle
    global BASE_URL
    BASE_URL = base_url or "https://malatyayenises.com"
    
    # URL listesini oluştur
    url_list = parse_urls_from_html(HTML_CONTENT)
    
    if not url_list:
        return "URL bulunamadı!", "HTML içeriğinden link çıkarılamadı."
    
    is_running = True
    status_log.clear()
    
    def update_status(new_status):
        # Bu fonksiyon Gradio'dan çağrılacak
        pass
    
    scraping_thread = threading.Thread(
        target=continuous_scraping,
        args=(interval, url_list, update_status),
        daemon=True
    )
    scraping_thread.start()
    
    return f"Scraping başlatıldı! {len(url_list)} sayfa izleniyor. Interval: {interval} saniye.", "\n".join(status_log[-20:]) if status_log else "Başlatılıyor..."

def stop_scraping():
    """Scraping'i durdur"""
    global is_running
    
    if not is_running:
        return "Zaten durmuş!", "\n".join(status_log[-20:]) if status_log else "Henüz kayıt yok."
    
    is_running = False
    return "Scraping durduruldu.", "\n".join(status_log[-20:]) if status_log else "Durduruldu."

def get_status():
    """Mevcut durumu döndür"""
    global is_running, status_log
    
    status_text = "Çalışıyor" if is_running else "Durmuyor"
    log_text = "\n".join(status_log[-20:]) if status_log else "Henüz kayıt yok."
    
    return f"Durum: {status_text}", log_text

# Gradio Interface
def create_interface():
    with gr.Blocks(title="Babam Yazılar - Sürekli Sayfa Gezici") as demo:
        gr.Markdown("# 📚 Babam Yazılar - Sürekli Sayfa Gezici")
        gr.Markdown("Bu uygulama, belirtilen sayfaları sürekli olarak ziyaret eder ve durumu gösterir.")
        
        with gr.Row():
            with gr.Column():
                base_url_input = gr.Textbox(
                    label="Base URL",
                    value="https://malatyayenises.com",
                    placeholder="Örn: https://malatyayenises.com"
                )
                interval_input = gr.Slider(
                    label="Ziyaret Aralığı (saniye)",
                    minimum=1,
                    maximum=300,
                    value=10,
                    step=1
                )
                
                with gr.Row():
                    start_btn = gr.Button("Başlat", variant="primary")
                    stop_btn = gr.Button("Durdur", variant="stop")
                    status_btn = gr.Button("Durum", variant="secondary")
                
                status_output = gr.Textbox(
                    label="Durum Mesajı",
                    interactive=False,
                    lines=2
                )
            
            with gr.Column():
                log_output = gr.Textbox(
                    label="İşlem Logları",
                    interactive=False,
                    lines=20,
                    max_lines=20
                )
        
        # Button actions
        def start_action(base_url, interval):
            msg, log = start_scraping(base_url, int(interval))
            return msg, log
        
        def stop_action():
            msg, log = stop_scraping()
            return msg, log
        
        def status_action():
            msg, log = get_status()
            return msg, log
        
        start_btn.click(
            fn=start_action,
            inputs=[base_url_input, interval_input],
            outputs=[status_output, log_output]
        )
        
        stop_btn.click(
            fn=stop_action,
            outputs=[status_output, log_output]
        )
        
        status_btn.click(
            fn=status_action,
            outputs=[status_output, log_output]
        )
        
        # Otomatik güncelleme
        def update_logs():
            """Logları güncelle"""
            global status_log
            if status_log:
                return "\n".join(status_log[-20:])
            return "Henüz kayıt yok."
        
        demo.load(
            fn=update_logs,
            outputs=[log_output],
            every=3  # Her 3 saniyede bir güncelle
        )
        
        # Log output'u otomatik güncelle
        log_output.change(
            fn=update_logs,
            outputs=[log_output],
            every=3
        )
    
    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(server_name="0.0.0.0", server_port=7860)