| import os |
| import asyncio |
| from fastapi import FastAPI, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| from typing import Optional |
| import uvicorn |
|
|
| app = FastAPI(title="X Scraper API", version="1.0.0") |
|
|
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| class ScrapeRequest(BaseModel): |
| url: str |
| cookies: Optional[str] = None |
|
|
| class SearchRequest(BaseModel): |
| query: str |
| cookies: Optional[str] = None |
|
|
| |
| def get_fetcher(cookies: Optional[str] = None): |
| from scrapling.fetchers import StealthyFetcher |
| return StealthyFetcher |
|
|
| |
|
|
| @app.get("/") |
| def root(): |
| return {"status": "ok", "service": "X Scraper API", "version": "1.0.0"} |
|
|
| @app.get("/health") |
| def health(): |
| return {"status": "healthy"} |
|
|
| @app.post("/scrape/tweet") |
| async def scrape_tweet(req: ScrapeRequest): |
| """Tek tweet URL'sini scrape et""" |
| if "x.com" not in req.url and "twitter.com" not in req.url: |
| raise HTTPException(400, "Sadece X.com URL'leri kabul edilir") |
|
|
| try: |
| from scrapling.fetchers import StealthyFetcher |
|
|
| headers = {} |
| if req.cookies: |
| headers["Cookie"] = req.cookies |
|
|
| page = await asyncio.to_thread( |
| lambda: StealthyFetcher.fetch( |
| req.url, |
| headless=True, |
| network_idle=True, |
| extra_headers=headers, |
| ) |
| ) |
|
|
| |
| tweet_data = {} |
|
|
| |
| text_el = page.css('[data-testid="tweetText"]') |
| tweet_data["text"] = text_el.get_text("\n") if text_el else "" |
|
|
| |
| user_el = page.css('[data-testid="User-Name"]') |
| tweet_data["user"] = user_el.get_text() if user_el else "" |
|
|
| |
| time_el = page.css("time") |
| tweet_data["timestamp"] = time_el.attrib.get("datetime", "") if time_el else "" |
|
|
| |
| like_el = page.css('[data-testid="like"]') |
| tweet_data["likes"] = like_el.attrib.get("aria-label", "0") if like_el else "0" |
|
|
| retweet_el = page.css('[data-testid="retweet"]') |
| tweet_data["retweets"] = retweet_el.attrib.get("aria-label", "0") if retweet_el else "0" |
|
|
| |
| tweet_data["images"] = [ |
| img.attrib.get("src", "") |
| for img in page.css('[data-testid="tweetPhoto"] img') |
| ] |
| tweet_data["has_video"] = bool(page.css('[data-testid="videoPlayer"]')) |
|
|
| tweet_data["url"] = req.url |
| tweet_data["source"] = "scrapling" |
|
|
| return {"success": True, "data": tweet_data} |
|
|
| except Exception as e: |
| raise HTTPException(500, f"Scrape hatası: {str(e)}") |
|
|
|
|
| @app.post("/scrape/profile") |
| async def scrape_profile(req: ScrapeRequest): |
| """Profil sayfasından tweetleri çek""" |
| if "x.com" not in req.url and "twitter.com" not in req.url: |
| raise HTTPException(400, "Sadece X.com URL'leri kabul edilir") |
|
|
| try: |
| from scrapling.fetchers import StealthyFetcher |
|
|
| headers = {} |
| if req.cookies: |
| headers["Cookie"] = req.cookies |
|
|
| page = await asyncio.to_thread( |
| lambda: StealthyFetcher.fetch( |
| req.url, |
| headless=True, |
| network_idle=True, |
| extra_headers=headers, |
| ) |
| ) |
|
|
| tweets = [] |
| articles = page.css('article[data-testid="tweet"]') |
|
|
| for article in articles: |
| try: |
| link = article.css('a[href*="/status/"]') |
| if not link: |
| continue |
|
|
| href = link.attrib.get("href", "") |
| import re |
| match = re.search(r'/status/(\d+)', href) |
| if not match: |
| continue |
|
|
| text_el = article.css('[data-testid="tweetText"]') |
| time_el = article.css("time") |
|
|
| tweet = { |
| "id": match.group(1), |
| "text": text_el.get_text() if text_el else "", |
| "timestamp": time_el.attrib.get("datetime", "") if time_el else "", |
| "url": f"https://x.com{href}", |
| } |
| tweets.append(tweet) |
| except Exception: |
| continue |
|
|
| return {"success": True, "count": len(tweets), "data": tweets} |
|
|
| except Exception as e: |
| raise HTTPException(500, f"Scrape hatası: {str(e)}") |
|
|
|
|
| @app.post("/scrape/search") |
| async def scrape_search(req: SearchRequest): |
| """Arama sonuçlarını scrape et""" |
| import urllib.parse |
| query_encoded = urllib.parse.quote(req.query) |
| url = f"https://x.com/search?q={query_encoded}&src=typed_query&f=live" |
|
|
| scrape_req = ScrapeRequest(url=url, cookies=req.cookies) |
| return await scrape_profile(scrape_req) |
|
|
|
|
| if __name__ == "__main__": |
| uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|