import os import asyncio from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional import uvicorn app = FastAPI(title="X Scraper API", version="1.0.0") # Extension'dan gelen isteklere izin ver app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # --- Models --- class ScrapeRequest(BaseModel): url: str cookies: Optional[str] = None # X login cookie (opsiyonel) class SearchRequest(BaseModel): query: str cookies: Optional[str] = None # --- Scrapling lazy import (ağır kütüphane) --- def get_fetcher(cookies: Optional[str] = None): from scrapling.fetchers import StealthyFetcher return StealthyFetcher # --- Endpoints --- @app.get("/") def root(): return {"status": "ok", "service": "X Scraper API", "version": "1.0.0"} @app.get("/health") def health(): return {"status": "healthy"} @app.post("/scrape/tweet") async def scrape_tweet(req: ScrapeRequest): """Tek tweet URL'sini scrape et""" if "x.com" not in req.url and "twitter.com" not in req.url: raise HTTPException(400, "Sadece X.com URL'leri kabul edilir") try: from scrapling.fetchers import StealthyFetcher headers = {} if req.cookies: headers["Cookie"] = req.cookies page = await asyncio.to_thread( lambda: StealthyFetcher.fetch( req.url, headless=True, network_idle=True, extra_headers=headers, ) ) # Tweet verilerini çek tweet_data = {} # Metin text_el = page.css('[data-testid="tweetText"]') tweet_data["text"] = text_el.get_text("\n") if text_el else "" # Kullanıcı user_el = page.css('[data-testid="User-Name"]') tweet_data["user"] = user_el.get_text() if user_el else "" # Zaman time_el = page.css("time") tweet_data["timestamp"] = time_el.attrib.get("datetime", "") if time_el else "" # Etkileşimler - aria-label'dan parse et like_el = page.css('[data-testid="like"]') tweet_data["likes"] = like_el.attrib.get("aria-label", "0") if like_el else "0" retweet_el = page.css('[data-testid="retweet"]') tweet_data["retweets"] = retweet_el.attrib.get("aria-label", "0") if retweet_el else "0" # Medya tweet_data["images"] = [ img.attrib.get("src", "") for img in page.css('[data-testid="tweetPhoto"] img') ] tweet_data["has_video"] = bool(page.css('[data-testid="videoPlayer"]')) tweet_data["url"] = req.url tweet_data["source"] = "scrapling" return {"success": True, "data": tweet_data} except Exception as e: raise HTTPException(500, f"Scrape hatası: {str(e)}") @app.post("/scrape/profile") async def scrape_profile(req: ScrapeRequest): """Profil sayfasından tweetleri çek""" if "x.com" not in req.url and "twitter.com" not in req.url: raise HTTPException(400, "Sadece X.com URL'leri kabul edilir") try: from scrapling.fetchers import StealthyFetcher headers = {} if req.cookies: headers["Cookie"] = req.cookies page = await asyncio.to_thread( lambda: StealthyFetcher.fetch( req.url, headless=True, network_idle=True, extra_headers=headers, ) ) tweets = [] articles = page.css('article[data-testid="tweet"]') for article in articles: try: link = article.css('a[href*="/status/"]') if not link: continue href = link.attrib.get("href", "") import re match = re.search(r'/status/(\d+)', href) if not match: continue text_el = article.css('[data-testid="tweetText"]') time_el = article.css("time") tweet = { "id": match.group(1), "text": text_el.get_text() if text_el else "", "timestamp": time_el.attrib.get("datetime", "") if time_el else "", "url": f"https://x.com{href}", } tweets.append(tweet) except Exception: continue return {"success": True, "count": len(tweets), "data": tweets} except Exception as e: raise HTTPException(500, f"Scrape hatası: {str(e)}") @app.post("/scrape/search") async def scrape_search(req: SearchRequest): """Arama sonuçlarını scrape et""" import urllib.parse query_encoded = urllib.parse.quote(req.query) url = f"https://x.com/search?q={query_encoded}&src=typed_query&f=live" scrape_req = ScrapeRequest(url=url, cookies=req.cookies) return await scrape_profile(scrape_req) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)