x-scraper / app.py
ziffir's picture
Upload 4 files
7cb594e verified
import os
import asyncio
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional
import uvicorn
app = FastAPI(title="X Scraper API", version="1.0.0")
# Extension'dan gelen isteklere izin ver
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# --- Models ---
class ScrapeRequest(BaseModel):
url: str
cookies: Optional[str] = None # X login cookie (opsiyonel)
class SearchRequest(BaseModel):
query: str
cookies: Optional[str] = None
# --- Scrapling lazy import (ağır kütüphane) ---
def get_fetcher(cookies: Optional[str] = None):
from scrapling.fetchers import StealthyFetcher
return StealthyFetcher
# --- Endpoints ---
@app.get("/")
def root():
return {"status": "ok", "service": "X Scraper API", "version": "1.0.0"}
@app.get("/health")
def health():
return {"status": "healthy"}
@app.post("/scrape/tweet")
async def scrape_tweet(req: ScrapeRequest):
"""Tek tweet URL'sini scrape et"""
if "x.com" not in req.url and "twitter.com" not in req.url:
raise HTTPException(400, "Sadece X.com URL'leri kabul edilir")
try:
from scrapling.fetchers import StealthyFetcher
headers = {}
if req.cookies:
headers["Cookie"] = req.cookies
page = await asyncio.to_thread(
lambda: StealthyFetcher.fetch(
req.url,
headless=True,
network_idle=True,
extra_headers=headers,
)
)
# Tweet verilerini çek
tweet_data = {}
# Metin
text_el = page.css('[data-testid="tweetText"]')
tweet_data["text"] = text_el.get_text("\n") if text_el else ""
# Kullanıcı
user_el = page.css('[data-testid="User-Name"]')
tweet_data["user"] = user_el.get_text() if user_el else ""
# Zaman
time_el = page.css("time")
tweet_data["timestamp"] = time_el.attrib.get("datetime", "") if time_el else ""
# Etkileşimler - aria-label'dan parse et
like_el = page.css('[data-testid="like"]')
tweet_data["likes"] = like_el.attrib.get("aria-label", "0") if like_el else "0"
retweet_el = page.css('[data-testid="retweet"]')
tweet_data["retweets"] = retweet_el.attrib.get("aria-label", "0") if retweet_el else "0"
# Medya
tweet_data["images"] = [
img.attrib.get("src", "")
for img in page.css('[data-testid="tweetPhoto"] img')
]
tweet_data["has_video"] = bool(page.css('[data-testid="videoPlayer"]'))
tweet_data["url"] = req.url
tweet_data["source"] = "scrapling"
return {"success": True, "data": tweet_data}
except Exception as e:
raise HTTPException(500, f"Scrape hatası: {str(e)}")
@app.post("/scrape/profile")
async def scrape_profile(req: ScrapeRequest):
"""Profil sayfasından tweetleri çek"""
if "x.com" not in req.url and "twitter.com" not in req.url:
raise HTTPException(400, "Sadece X.com URL'leri kabul edilir")
try:
from scrapling.fetchers import StealthyFetcher
headers = {}
if req.cookies:
headers["Cookie"] = req.cookies
page = await asyncio.to_thread(
lambda: StealthyFetcher.fetch(
req.url,
headless=True,
network_idle=True,
extra_headers=headers,
)
)
tweets = []
articles = page.css('article[data-testid="tweet"]')
for article in articles:
try:
link = article.css('a[href*="/status/"]')
if not link:
continue
href = link.attrib.get("href", "")
import re
match = re.search(r'/status/(\d+)', href)
if not match:
continue
text_el = article.css('[data-testid="tweetText"]')
time_el = article.css("time")
tweet = {
"id": match.group(1),
"text": text_el.get_text() if text_el else "",
"timestamp": time_el.attrib.get("datetime", "") if time_el else "",
"url": f"https://x.com{href}",
}
tweets.append(tweet)
except Exception:
continue
return {"success": True, "count": len(tweets), "data": tweets}
except Exception as e:
raise HTTPException(500, f"Scrape hatası: {str(e)}")
@app.post("/scrape/search")
async def scrape_search(req: SearchRequest):
"""Arama sonuçlarını scrape et"""
import urllib.parse
query_encoded = urllib.parse.quote(req.query)
url = f"https://x.com/search?q={query_encoded}&src=typed_query&f=live"
scrape_req = ScrapeRequest(url=url, cookies=req.cookies)
return await scrape_profile(scrape_req)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)