Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import time | |
| import threading | |
| from datetime import datetime | |
| import re | |
| # HTML'den linkleri çıkar - Güncel liste (36 link) | |
| HTML_CONTENT = """ | |
| <div class="yazarlar"> | |
| <ul class="liste arali"> | |
| <li><a href="/irfan-basaranoglu/tanri-gibi-hissetmek">TANRI GİBİ HİSSETMEK<time datetime="2025-10-31T03:34:00+03:00" style="display:block;padding-top:5px">31 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/tasli-tarla">TAŞLI TARLA<time datetime="2025-10-30T01:37:00+03:00" style="display:block;padding-top:5px">30 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/kaybolan-huzur">KAYBOLAN HUZUR<time datetime="2025-10-29T02:14:00+03:00" style="display:block;padding-top:5px">29 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/bu-kalp-sizi-unutur-mu">BU KALP SİZİ UNUTUR MU?<time datetime="2025-10-28T16:22:00+03:00" style="display:block;padding-top:5px">28 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/siz-de-eve-gideceksiniz">SİZ DE EVE GİDECEKSİNİZ<time datetime="2025-10-27T02:27:00+03:00" style="display:block;padding-top:5px">27 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/huzur-hakki">HUZUR HAKKI<time datetime="2025-10-26T01:05:00+03:00" style="display:block;padding-top:5px">26 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/sari-aganin-sari-traktoru">SARI AĞANIN SARI TRAKTÖRÜ<time datetime="2025-10-25T16:02:00+03:00" style="display:block;padding-top:5px">25 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/ruzgr-gulu">RÜZGÂR GÜLÜ<time datetime="2025-10-24T13:46:00+03:00" style="display:block;padding-top:5px">24 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/eski-bir-yara">ESKİ BİR YARA<time datetime="2025-10-23T00:32:00+03:00" style="display:block;padding-top:5px">23 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/mudur-osman">MÜDÜR OSMAN<time datetime="2025-10-22T13:42:00+03:00" style="display:block;padding-top:5px">22 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/bu-dunyadan-bir-veysel-gecti">BU DÜNYADAN BİR VEYSEL GEÇTİ<time datetime="2025-10-21T01:10:00+03:00" style="display:block;padding-top:5px">21 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/mukayese">MUKAYESE<time datetime="2025-10-20T02:23:00+03:00" style="display:block;padding-top:5px">20 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/marangoz-cemile">MARANGOZ CEMİLE<time datetime="2025-10-19T11:50:00+03:00" style="display:block;padding-top:5px">19 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/keske-kalbim-sende-yasayabilseydi">KEŞKE KALBİM SENDE YAŞAYABİLSEYDİ<time datetime="2025-10-18T07:22:00+03:00" style="display:block;padding-top:5px">18 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/lacivert-kaban">LACİVERT KABAN<time datetime="2025-10-17T12:38:00+03:00" style="display:block;padding-top:5px">17 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/kucuk-jandarma">KÜÇÜK JANDARMA<time datetime="2025-10-16T12:53:00+03:00" style="display:block;padding-top:5px">16 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/komur-karasi-2">KÖMÜR KARASI<time datetime="2025-10-15T06:54:00+03:00" style="display:block;padding-top:5px">15 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/komur-karasi-gozlerini-ozledim">KÖMÜR KARASI GÖZLERİNİ ÖZLEDİM<time datetime="2025-10-14T07:51:00+03:00" style="display:block;padding-top:5px">14 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/kirve">KİRVE<time datetime="2025-10-12T10:28:00+03:00" style="display:block;padding-top:5px">12 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/kirmizi-elma">KIRMIZI ELMA<time datetime="2025-10-10T23:30:00+03:00" style="display:block;padding-top:5px">10 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/kariyer-3">KARİYER<time datetime="2025-10-09T22:06:00+03:00" style="display:block;padding-top:5px">9 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/kinali-kuzu-2">KINALI KUZU<time datetime="2025-10-09T09:14:00+03:00" style="display:block;padding-top:5px">9 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/boyle-evlat-olmaz-olsun">BÖYLE EVLAT OLMAZ OLSUN<time datetime="2025-10-08T00:13:00+03:00" style="display:block;padding-top:5px">8 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/kanarya-sesli-kapi-zili">KANARYA SESLİ KAPI ZİLİ<time datetime="2025-10-07T13:50:00+03:00" style="display:block;padding-top:5px">7 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/kafamdaki-hapishane">KAFAMDAKİ HAPİSHANE<time datetime="2025-10-06T08:21:00+03:00" style="display:block;padding-top:5px">6 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/hicbir-lamba-gunes-kadar-aydinlatamaz">HİÇBİR LAMBA GÜNEŞ KADAR AYDINLATAMAZ<time datetime="2025-10-05T10:58:00+03:00" style="display:block;padding-top:5px">5 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/hayat-iste-simdi-basliyor">Hayat işte şimdi başlıyor<time datetime="2025-10-04T10:01:00+03:00" style="display:block;padding-top:5px">4 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/fenerin-bekcisi">FENERİN BEKÇİSİ<time datetime="2025-10-03T09:04:00+03:00" style="display:block;padding-top:5px">3 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/fatmanin-hayat-mucadelesi">FATMA'NIN HAYAT MÜCADELESİ<time datetime="2025-10-02T09:00:00+03:00" style="display:block;padding-top:5px">2 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/evinin-kaptani-ol">EVİNİN KAPTANI OL<time datetime="2025-10-01T08:39:00+03:00" style="display:block;padding-top:5px">1 Ekim 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/ese-nene">EŞE NENE<time datetime="2025-09-30T13:49:00+03:00" style="display:block;padding-top:5px">30 Eylül 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/emine-kadin">EMİNE KADIN<time datetime="2025-09-29T10:13:00+03:00" style="display:block;padding-top:5px">29 Eylül 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/degirmen">DEĞİRMEN<time datetime="2025-09-28T14:20:00+03:00" style="display:block;padding-top:5px">28 Eylül 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/colun-ortasinda-bir-vaha">Çölün ortasında bir vaha<time datetime="2025-09-27T11:20:00+03:00" style="display:block;padding-top:5px">27 Eylül 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/cedene">ÇEDENE<time datetime="2025-09-26T09:42:00+03:00" style="display:block;padding-top:5px">26 Eylül 2025</time></a></li> | |
| <li><a href="/irfan-basaranoglu/cemal-2">CEMAL<time datetime="2025-09-25T10:32:00+03:00" style="display:block;padding-top:5px">25 Eylül 2025</time></a></li> | |
| </ul> | |
| </div> | |
| """ | |
| # Base URL - gerçek site URL'i | |
| BASE_URL = "https://malatyayenises.com" | |
| # Global değişkenler | |
| is_running = False | |
| scraping_thread = None | |
| status_log = [] | |
| def parse_urls_from_html(html_content): | |
| """HTML içeriğinden URL'leri çıkar""" | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| urls = [] | |
| for link in soup.find_all('a', href=True): | |
| href = link.get('href') | |
| text = link.get_text(strip=True) | |
| # Time elementini çıkar | |
| time_elem = link.find('time') | |
| if time_elem: | |
| text = text.replace(time_elem.get_text(strip=True), '').strip() | |
| if href and href.startswith('/irfan-basaranoglu/'): | |
| full_url = BASE_URL + href | |
| urls.append({ | |
| 'url': full_url, | |
| 'title': text, | |
| 'path': href | |
| }) | |
| return urls | |
| def scrape_page(url_info): | |
| """Bir sayfayı scrape et""" | |
| try: | |
| response = requests.get(url_info['url'], timeout=10, headers={ | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' | |
| }) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # Sayfa başlığını al | |
| title = soup.find('title') | |
| title_text = title.get_text(strip=True) if title else url_info['title'] | |
| # Ana içeriği al (içeriğe göre ayarlanabilir) | |
| content = soup.find('article') or soup.find('div', class_='content') or soup.find('main') | |
| content_text = content.get_text(strip=True)[:500] if content else "İçerik bulunamadı" | |
| return { | |
| 'success': True, | |
| 'url': url_info['url'], | |
| 'title': title_text, | |
| 'preview': content_text, | |
| 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
| } | |
| except Exception as e: | |
| return { | |
| 'success': False, | |
| 'url': url_info['url'], | |
| 'error': str(e), | |
| 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
| } | |
| def continuous_scraping(interval_seconds, url_list, status_callback=None): | |
| """Sürekli olarak sayfaları ziyaret et""" | |
| global is_running | |
| url_index = 0 | |
| while is_running: | |
| if url_list: | |
| url_info = url_list[url_index % len(url_list)] | |
| result = scrape_page(url_info) | |
| status_msg = f"[{result['timestamp']}] Ziyaret: {url_info['title'][:50]}..." | |
| if result['success']: | |
| status_msg += f" ✓ Başarılı" | |
| else: | |
| status_msg += f" ✗ Hata: {result.get('error', 'Bilinmeyen')}" | |
| status_log.append(status_msg) | |
| if len(status_log) > 100: # Son 100 kaydı tut | |
| status_log.pop(0) | |
| if status_callback: | |
| status_callback("\n".join(status_log[-20:])) # Son 20 satırı göster | |
| url_index += 1 | |
| time.sleep(interval_seconds) | |
| else: | |
| time.sleep(1) | |
| def start_scraping(base_url, interval): | |
| """Scraping'i başlat""" | |
| global is_running, scraping_thread | |
| if is_running: | |
| return "Zaten çalışıyor!", "\n".join(status_log[-20:]) if status_log else "Henüz kayıt yok." | |
| # Base URL'yi güncelle | |
| global BASE_URL | |
| BASE_URL = base_url or "https://malatyayenises.com" | |
| # URL listesini oluştur | |
| url_list = parse_urls_from_html(HTML_CONTENT) | |
| if not url_list: | |
| return "URL bulunamadı!", "HTML içeriğinden link çıkarılamadı." | |
| is_running = True | |
| status_log.clear() | |
| def update_status(new_status): | |
| # Bu fonksiyon Gradio'dan çağrılacak | |
| pass | |
| scraping_thread = threading.Thread( | |
| target=continuous_scraping, | |
| args=(interval, url_list, update_status), | |
| daemon=True | |
| ) | |
| scraping_thread.start() | |
| return f"Scraping başlatıldı! {len(url_list)} sayfa izleniyor. Interval: {interval} saniye.", "\n".join(status_log[-20:]) if status_log else "Başlatılıyor..." | |
| def stop_scraping(): | |
| """Scraping'i durdur""" | |
| global is_running | |
| if not is_running: | |
| return "Zaten durmuş!", "\n".join(status_log[-20:]) if status_log else "Henüz kayıt yok." | |
| is_running = False | |
| return "Scraping durduruldu.", "\n".join(status_log[-20:]) if status_log else "Durduruldu." | |
| def get_status(): | |
| """Mevcut durumu döndür""" | |
| global is_running, status_log | |
| status_text = "Çalışıyor" if is_running else "Durmuyor" | |
| log_text = "\n".join(status_log[-20:]) if status_log else "Henüz kayıt yok." | |
| return f"Durum: {status_text}", log_text | |
| # Gradio Interface | |
| def create_interface(): | |
| with gr.Blocks(title="Babam Yazılar - Sürekli Sayfa Gezici") as demo: | |
| gr.Markdown("# 📚 Babam Yazılar - Sürekli Sayfa Gezici") | |
| gr.Markdown("Bu uygulama, belirtilen sayfaları sürekli olarak ziyaret eder ve durumu gösterir.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| base_url_input = gr.Textbox( | |
| label="Base URL", | |
| value="https://malatyayenises.com", | |
| placeholder="Örn: https://malatyayenises.com" | |
| ) | |
| interval_input = gr.Slider( | |
| label="Ziyaret Aralığı (saniye)", | |
| minimum=1, | |
| maximum=300, | |
| value=10, | |
| step=1 | |
| ) | |
| with gr.Row(): | |
| start_btn = gr.Button("Başlat", variant="primary") | |
| stop_btn = gr.Button("Durdur", variant="stop") | |
| status_btn = gr.Button("Durum", variant="secondary") | |
| status_output = gr.Textbox( | |
| label="Durum Mesajı", | |
| interactive=False, | |
| lines=2 | |
| ) | |
| with gr.Column(): | |
| log_output = gr.Textbox( | |
| label="İşlem Logları", | |
| interactive=False, | |
| lines=20, | |
| max_lines=20 | |
| ) | |
| # Button actions | |
| def start_action(base_url, interval): | |
| msg, log = start_scraping(base_url, int(interval)) | |
| return msg, log | |
| def stop_action(): | |
| msg, log = stop_scraping() | |
| return msg, log | |
| def status_action(): | |
| msg, log = get_status() | |
| return msg, log | |
| start_btn.click( | |
| fn=start_action, | |
| inputs=[base_url_input, interval_input], | |
| outputs=[status_output, log_output] | |
| ) | |
| stop_btn.click( | |
| fn=stop_action, | |
| outputs=[status_output, log_output] | |
| ) | |
| status_btn.click( | |
| fn=status_action, | |
| outputs=[status_output, log_output] | |
| ) | |
| # Otomatik güncelleme | |
| def update_logs(): | |
| """Logları güncelle""" | |
| global status_log | |
| if status_log: | |
| return "\n".join(status_log[-20:]) | |
| return "Henüz kayıt yok." | |
| demo.load( | |
| fn=update_logs, | |
| outputs=[log_output], | |
| every=3 # Her 3 saniyede bir güncelle | |
| ) | |
| # Log output'u otomatik güncelle | |
| log_output.change( | |
| fn=update_logs, | |
| outputs=[log_output], | |
| every=3 | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |