313 / data_fetcher.py
ali3133's picture
Upload 3 files
a4f6aa2 verified
"""
جمع البيانات الحقيقية — نسخة مُصلحة ومُبسّطة
"""
import requests, json, time, hashlib, logging, threading, re
from datetime import datetime
from dataclasses import dataclass, asdict
from typing import List
from collections import deque
try:
import feedparser
except ImportError:
feedparser = None
try:
from bs4 import BeautifulSoup
except ImportError:
BeautifulSoup = None
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
log = logging.getLogger("F")
UA = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36"}
# ═══ Models ═══
@dataclass
class Aircraft:
icao24:str; callsign:str; lat:float; lng:float; alt:float
velocity:float; heading:float; category:str="civil"
def to_dict(self): return asdict(self)
@dataclass
class Ship:
mmsi:str; name:str; lat:float; lng:float; course:float
speed:float; ship_type:str; source:str="AIS"
def to_dict(self): return asdict(self)
@dataclass
class Event:
id:str; title:str; lat:float; lng:float; event_type:str
location:str; source_name:str; source_url:str; date:str
def to_dict(self): return asdict(self)
@dataclass
class News:
title:str; link:str; source:str; published:str
summary:str=""; is_military:bool=False
def to_dict(self): return asdict(self)
@dataclass
class Camera:
id:str; lat:float; lng:float; country:str; url:str
def to_dict(self): return asdict(self)
# ═══ Helpers ═══
MIL_PFX = ["FORTE","JAKE","VIPER","REAP","RCH","HAWK","DUKE","NAVY","ARMY","EVAC",
"REACH","TOPCAT","ATLAS","GIANT","IRON","STEEL","COBRA","TIGER","EAGLE",
"DEMON","GHOST","SKULL","DOOM","FURY","BOLT","SABER","LANCE","TORCH",
"GAF","BAF","FAF","PLF","PAF","IAF","QAF","UAE","RSF","EGF","ASCOT"]
MIL_HEX = [(0xADF7C0,0xAFFFFF),(0x3F0000,0x3FFFFF),(0x3C0000,0x3C7FFF),(0x43C000,0x43CFFF)]
MIL_KW = ["military","war","conflict","airstrike","missile","troops","drone","bomb","explosion",
"strike","attack","defense","عسكري","حرب","غارة","صاروخ","قوات","قصف","هجوم","انفجار","معركة","جيش"]
GEO = {"syria":(35,38),"iraq":(33.3,44.4),"yemen":(15.4,44.2),"gaza":(31.4,34.4),
"lebanon":(33.9,35.5),"ukraine":(48.4,37.8),"sudan":(15.6,32.5),"iran":(32.4,53.7),
"israel":(31.8,35.2),"libya":(32.9,13.1),"red sea":(18,40),"russia":(55.8,37.6),
"turkey":(39.9,32.9),"egypt":(30,31.2),"jordan":(31.9,35.9),"saudi":(24.7,46.7),
"سوريا":(35,38),"عراق":(33.3,44.4),"يمن":(15.4,44.2),"غزة":(31.4,34.4),
"لبنان":(33.9,35.5),"أوكرانيا":(48.4,37.8),"البحر الأحمر":(18,40),"مصر":(30,31.2)}
def is_mil(icao, cs):
c=(cs or "").strip().upper()
for p in MIL_PFX:
if c.startswith(p): return True
try:
h=int(icao,16)
for lo,hi in MIL_HEX:
if lo<=h<=hi: return True
except: pass
return False
def geo_est(t):
tl=t.lower()
for k,(la,lo) in GEO.items():
if k in tl: return la,lo
return None,None
def ev_type(t):
tl=t.lower()
if any(w in tl for w in ["airstrike","bombing","strike","غارة","قصف","explosion","missile","صاروخ"]): return "explosion"
if any(w in tl for w in ["protest","احتجاج"]): return "protest"
if any(w in tl for w in ["battle","clash","اشتباك","معركة"]): return "battle"
if any(w in tl for w in ["troops","deploy","عسكري","قوات"]): return "strategic"
return "violence"
# ═══ 1. OpenSky — Public API (NO AUTH NEEDED) ═══
def fetch_opensky() -> List[Aircraft]:
"""OpenSky public API - no authentication required"""
result = []
zones = [
(12,42,25,63,"ME"), # Middle East
(44,56,22,45,"EU"), # East Europe
]
for lamin,lamax,lomin,lomax,name in zones:
try:
log.info(f"[OpenSky] Fetching {name}...")
r = requests.get(
"https://opensky-network.org/api/states/all",
params={"lamin":lamin,"lamax":lamax,"lomin":lomin,"lomax":lomax},
headers=UA, timeout=30
)
log.info(f"[OpenSky] {name}: HTTP {r.status_code}")
if r.status_code == 200:
states = r.json().get("states") or []
for s in states:
if not s or len(s)<8 or s[6] is None or s[5] is None: continue
ic=s[0] or ""; cs=(s[1] or "").strip()
result.append(Aircraft(icao24=ic,callsign=cs or ic,
lat=float(s[6]),lng=float(s[5]),alt=float(s[7] or s[13] or 0),
velocity=float(s[9] or 0),heading=float(s[10] or 0),
category="military" if is_mil(ic,cs) else "civil"))
log.info(f"[OpenSky] {name}: {len(states)} aircraft")
else:
log.warning(f"[OpenSky] {name}: {r.status_code} - {r.text[:100]}")
time.sleep(5) # Public API: 1 req per 5 sec
except Exception as e:
log.error(f"[OpenSky] {name} ERROR: {e}")
log.info(f"[OpenSky] Total: {len(result)} ({sum(1 for a in result if a.category=='military')} mil)")
return result
# ═══ 2. Ships — DigiTraffic Public AIS (always works) ═══
def fetch_ships() -> List[Ship]:
"""Finnish DigiTraffic - free, no auth, reliable"""
ships = []
try:
log.info("[Ships] Fetching DigiTraffic...")
r = requests.get("https://meri.digitraffic.fi/api/vessel-location/v1/locations",
headers=UA, timeout=20)
log.info(f"[Ships] HTTP {r.status_code}")
if r.status_code == 200:
features = r.json().get("features", [])
for f in features[:400]:
p=f.get("properties",{}); c=f.get("geometry",{}).get("coordinates",[])
if len(c)<2: continue
ships.append(Ship(mmsi=str(p.get("mmsi","")),name="Vessel",
lat=float(c[1]),lng=float(c[0]),course=float(p.get("cog",0)),
speed=float(p.get("sog",0)),ship_type="cargo",source="DigiTraffic"))
log.info(f"[Ships] {len(ships)} vessels")
except Exception as e:
log.error(f"[Ships] ERROR: {e}")
return ships
# ═══ 3. GDELT — Conflict Events ═══
def fetch_gdelt() -> List[Event]:
"""GDELT V2 API"""
events = []; seen = set()
queries = [
("conflict OR military OR airstrike OR missile OR bombing", "eng"),
("غارة OR صواريخ OR عسكري OR قصف", "ara"),
]
for q, lang in queries:
try:
log.info(f"[GDELT] Fetching '{q[:30]}'...")
r = requests.get("https://api.gdeltproject.org/api/v2/doc/doc", params={
"query":q, "mode":"artlist", "maxrecords":"40",
"format":"json", "timespan":"24h", "sourcelang":lang,
}, headers=UA, timeout=25)
log.info(f"[GDELT] HTTP {r.status_code}, len={len(r.text)}")
if r.status_code == 200 and r.text.strip():
txt = r.text.strip()
# GDELT sometimes returns non-JSON
if not txt.startswith('{'):
log.warning(f"[GDELT] Not JSON: {txt[:80]}")
continue
data = json.loads(txt)
arts = data.get("articles", [])
for a in arts:
t=a.get("title","")
if not t or t[:50] in seen: continue
seen.add(t[:50])
la=a.get("latitude"); lo=a.get("longitude")
if la is None or lo is None: la,lo=geo_est(t)
events.append(Event(
id=hashlib.md5(t[:50].encode()).hexdigest()[:10],
title=t[:200], lat=float(la or 33), lng=float(lo or 44),
event_type=ev_type(t), location=a.get("sourcecountry",""),
source_name=a.get("domain","GDELT"),
source_url=a.get("url",""),
date=a.get("seendate",""),
))
log.info(f"[GDELT] '{q[:20]}': {len(arts)} articles")
time.sleep(3)
except Exception as e:
log.error(f"[GDELT] ERROR: {e}")
# GeoJSON endpoint
try:
log.info("[GDELT] Fetching geo...")
r = requests.get("https://api.gdeltproject.org/api/v2/geo/geo", params={
"query":"military OR conflict OR airstrike",
"format":"geojson","timespan":"24h","maxpoints":"80",
}, headers=UA, timeout=25)
if r.status_code == 200 and r.text.strip().startswith('{'):
for f in json.loads(r.text).get("features",[]):
c=f.get("geometry",{}).get("coordinates",[])
if len(c)<2: continue
p=f.get("properties",{})
nm=str(p.get("name",p.get("html","Event")))[:200]
if BeautifulSoup: nm=BeautifulSoup(nm,"html.parser").get_text()
if nm[:50] in seen: continue
seen.add(nm[:50])
events.append(Event(id=hashlib.md5(nm[:40].encode()).hexdigest()[:10],
title=nm,lat=float(c[1]),lng=float(c[0]),event_type=ev_type(nm),
location="",source_name="GDELT",source_url="",
date=datetime.now().strftime("%Y-%m-%d")))
log.info(f"[GDELT] Geo: added points")
except Exception as e:
log.error(f"[GDELT] Geo ERROR: {e}")
log.info(f"[GDELT] Total: {len(events)} events")
return events
# ═══ 4. RSS News ═══
RSS = {
"الجزيرة":"https://www.aljazeera.net/aljazeerarss/a7c186be-1baa-4bd4-9d80-a84db769f779/73d0e1b4-532f-45ef-b135-bfdff8b8cab9",
"الجزيرة عاجل":"https://www.aljazeera.net/aljazeerarss/a7c186be-1baa-4bd4-9d80-a84db769f779/ce3b4f1b-3bc9-4c5f-9894-c1a07201fa7d",
"BBC عربي":"https://feeds.bbci.co.uk/arabic/rss.xml",
"سكاي نيوز":"https://www.skynewsarabia.com/web/rss",
"العربية":"https://www.alarabiya.net/.mrss/ar.xml",
"RT عربي":"https://arabic.rt.com/rss/",
"France24":"https://www.france24.com/ar/rss",
"Reuters":"https://feeds.reuters.com/Reuters/worldNews",
"BBC World":"https://feeds.bbci.co.uk/news/world/rss.xml",
"BBC ME":"https://feeds.bbci.co.uk/news/world/middle_east/rss.xml",
"AJ EN":"https://www.aljazeera.com/xml/rss/all.xml",
"BBC Europe":"https://feeds.bbci.co.uk/news/world/europe/rss.xml",
"Defense News":"https://www.defensenews.com/arc/outboundfeeds/rss/",
}
def fetch_rss() -> List[News]:
if not feedparser:
log.error("[RSS] feedparser not installed!")
return []
items = []
for name, url in RSS.items():
try:
fd = feedparser.parse(url)
for e in fd.entries[:8]:
t=e.get("title",""); s=e.get("summary",e.get("description",""))
if not t: continue
combined=f"{t} {s}".lower()
summary = ""
if s and BeautifulSoup:
summary = BeautifulSoup(s,"html.parser").get_text()[:200]
elif s:
summary = re.sub('<[^>]+>','',s)[:200]
items.append(News(title=t[:300],link=e.get("link",""),source=name,
published=e.get("published",e.get("updated","")),
summary=summary,
is_military=any(k in combined for k in MIL_KW)))
except Exception as e:
log.warning(f"[RSS] {name}: {e}")
items.sort(key=lambda x:(not x.is_military))
log.info(f"[RSS] {len(items)} news ({sum(1 for i in items if i.is_military)} military)")
return items
# ═══ 5. Cameras ═══
COUNTRIES = {"IL":("Israel",31.8,35.2),"UA":("Ukraine",48.4,37.8),"TR":("Turkey",39.9,32.9),
"RU":("Russia",55.8,37.6),"IQ":("Iraq",33.3,44.4),"EG":("Egypt",30,31.2)}
def fetch_cameras() -> List[Camera]:
cams = []
for code,(name,dlat,dlng) in COUNTRIES.items():
try:
r=requests.get(f"http://www.insecam.org/en/bycountry/{code}/",
headers={**UA,"Referer":"http://www.insecam.org/"},timeout=15)
if r.status_code==200 and BeautifulSoup:
for i,img in enumerate(BeautifulSoup(r.text,"html.parser").select("img.thumbnail-item__img")[:5]):
src=img.get("src","")
if src: cams.append(Camera(id=f"c_{code}_{i}",lat=dlat+(i*0.03),
lng=dlng+(i*0.03),country=name,url=src))
except: pass
log.info(f"[Cam] {len(cams)} cameras")
return cams
# ═══ Central Manager ═══
class DataManager:
def __init__(self):
self.aircraft=[]; self.ships=[]; self.events=[]
self.news=[]; self.cameras=[]
self.status={}; self.last_update=0
self.is_fetching=False; self.lock=threading.Lock()
self.logs=deque(maxlen=100)
def _log(self, msg):
self.logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
log.info(msg)
def fetch_all(self):
if self.is_fetching: return
self.is_fetching=True; t0=time.time()
self._log("⏳ بدء التحديث من جميع المصادر...")
# 1) OpenSky
try:
ac=fetch_opensky()
with self.lock: self.aircraft=ac
mil=sum(1 for a in ac if a.category=="military")
self.status["OpenSky"]={"s":"online" if ac else "error","c":len(ac)}
self._log(f"✈️ OpenSky: {len(ac)} طائرة ({mil} عسكري)")
except Exception as e:
self.status["OpenSky"]={"s":"error","c":0}
self._log(f"❌ OpenSky: {e}")
# 2) Ships
try:
sh=fetch_ships()
with self.lock: self.ships=sh
self.status["Ships"]={"s":"online" if sh else "error","c":len(sh)}
self._log(f"🚢 Ships: {len(sh)} سفينة")
except Exception as e:
self.status["Ships"]={"s":"error","c":0}
self._log(f"❌ Ships: {e}")
# 3) GDELT
try:
ev=fetch_gdelt()
with self.lock:
ids={e.id for e in ev}
old=[c for c in self.events if time.time()-c.timestamp<86400 and c.id not in ids] if hasattr(self.events[0],'timestamp') and self.events else []
self.events=ev+old if old else ev
self.status["GDELT"]={"s":"online" if ev else "error","c":len(self.events)}
self._log(f"⚔️ GDELT: {len(ev)} حدث نزاع")
except Exception as e:
self.status["GDELT"]={"s":"error","c":0}
self._log(f"❌ GDELT: {e}")
# 4) RSS
try:
nw=fetch_rss()
with self.lock: self.news=nw
self.status["RSS"]={"s":"online" if nw else "error","c":len(nw)}
self._log(f"📰 RSS: {len(nw)} خبر")
except Exception as e:
self.status["RSS"]={"s":"error","c":0}
self._log(f"❌ RSS: {e}")
# 5) Cameras
try:
cm=fetch_cameras()
with self.lock:
if cm: self.cameras=cm
self.status["Cameras"]={"s":"online" if cm else "error","c":len(self.cameras)}
self._log(f"📷 Cameras: {len(cm)} كاميرا")
except Exception as e:
self.status["Cameras"]={"s":"error","c":0}
self._log(f"❌ Cameras: {e}")
self.last_update=time.time(); self.is_fetching=False
self._log(f"✅ اكتمل في {time.time()-t0:.1f} ثانية")
def to_json(self):
with self.lock:
mil=[a.to_dict() for a in self.aircraft if a.category=="military"]
civ=[a.to_dict() for a in self.aircraft if a.category!="military"]
return json.dumps({
"aircraft":{"military":mil[:200],"civil":civ[:500],"total":len(self.aircraft)},
"ships":[s.to_dict() for s in self.ships[:400]],
"conflicts":[e.to_dict() for e in self.events[:200]],
"news":[n.to_dict() for n in self.news[:100]],
"cameras":[c.to_dict() for c in self.cameras[:50]],
"meta":{"update_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
}, ensure_ascii=False)