Spaces:

ali3133
/

313

Sleeping

App Files Files Community

313 / data_fetcher.py

ali3133

Upload 3 files

a4f6aa2 verified about 2 months ago

raw

history blame contribute delete

16.5 kB

	"""
	جمع البيانات الحقيقية — نسخة مُصلحة ومُبسّطة
	"""
	import requests, json, time, hashlib, logging, threading, re
	from datetime import datetime
	from dataclasses import dataclass, asdict
	from typing import List
	from collections import deque

	try:
	import feedparser
	except ImportError:
	feedparser = None

	try:
	from bs4 import BeautifulSoup
	except ImportError:
	BeautifulSoup = None

	logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
	log = logging.getLogger("F")
	UA = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36"}

	# ═══ Models ═══
	@dataclass
	class Aircraft:
	icao24:str; callsign:str; lat:float; lng:float; alt:float
	velocity:float; heading:float; category:str="civil"
	def to_dict(self): return asdict(self)

	@dataclass
	class Ship:
	mmsi:str; name:str; lat:float; lng:float; course:float
	speed:float; ship_type:str; source:str="AIS"
	def to_dict(self): return asdict(self)

	@dataclass
	class Event:
	id:str; title:str; lat:float; lng:float; event_type:str
	location:str; source_name:str; source_url:str; date:str
	def to_dict(self): return asdict(self)

	@dataclass
	class News:
	title:str; link:str; source:str; published:str
	summary:str=""; is_military:bool=False
	def to_dict(self): return asdict(self)

	@dataclass
	class Camera:
	id:str; lat:float; lng:float; country:str; url:str
	def to_dict(self): return asdict(self)

	# ═══ Helpers ═══
	MIL_PFX = ["FORTE","JAKE","VIPER","REAP","RCH","HAWK","DUKE","NAVY","ARMY","EVAC",
	"REACH","TOPCAT","ATLAS","GIANT","IRON","STEEL","COBRA","TIGER","EAGLE",
	"DEMON","GHOST","SKULL","DOOM","FURY","BOLT","SABER","LANCE","TORCH",
	"GAF","BAF","FAF","PLF","PAF","IAF","QAF","UAE","RSF","EGF","ASCOT"]
	MIL_HEX = [(0xADF7C0,0xAFFFFF),(0x3F0000,0x3FFFFF),(0x3C0000,0x3C7FFF),(0x43C000,0x43CFFF)]
	MIL_KW = ["military","war","conflict","airstrike","missile","troops","drone","bomb","explosion",
	"strike","attack","defense","عسكري","حرب","غارة","صاروخ","قوات","قصف","هجوم","انفجار","معركة","جيش"]
	GEO = {"syria":(35,38),"iraq":(33.3,44.4),"yemen":(15.4,44.2),"gaza":(31.4,34.4),
	"lebanon":(33.9,35.5),"ukraine":(48.4,37.8),"sudan":(15.6,32.5),"iran":(32.4,53.7),
	"israel":(31.8,35.2),"libya":(32.9,13.1),"red sea":(18,40),"russia":(55.8,37.6),
	"turkey":(39.9,32.9),"egypt":(30,31.2),"jordan":(31.9,35.9),"saudi":(24.7,46.7),
	"سوريا":(35,38),"عراق":(33.3,44.4),"يمن":(15.4,44.2),"غزة":(31.4,34.4),
	"لبنان":(33.9,35.5),"أوكرانيا":(48.4,37.8),"البحر الأحمر":(18,40),"مصر":(30,31.2)}

	def is_mil(icao, cs):
	c=(cs or "").strip().upper()
	for p in MIL_PFX:
	if c.startswith(p): return True
	try:
	h=int(icao,16)
	for lo,hi in MIL_HEX:
	if lo<=h<=hi: return True
	except: pass
	return False

	def geo_est(t):
	tl=t.lower()
	for k,(la,lo) in GEO.items():
	if k in tl: return la,lo
	return None,None

	def ev_type(t):
	tl=t.lower()
	if any(w in tl for w in ["airstrike","bombing","strike","غارة","قصف","explosion","missile","صاروخ"]): return "explosion"
	if any(w in tl for w in ["protest","احتجاج"]): return "protest"
	if any(w in tl for w in ["battle","clash","اشتباك","معركة"]): return "battle"
	if any(w in tl for w in ["troops","deploy","عسكري","قوات"]): return "strategic"
	return "violence"

	# ═══ 1. OpenSky — Public API (NO AUTH NEEDED) ═══
	def fetch_opensky() -> List[Aircraft]:
	"""OpenSky public API - no authentication required"""
	result = []
	zones = [
	(12,42,25,63,"ME"), # Middle East
	(44,56,22,45,"EU"), # East Europe
	]
	for lamin,lamax,lomin,lomax,name in zones:
	try:
	log.info(f"[OpenSky] Fetching {name}...")
	r = requests.get(
	"https://opensky-network.org/api/states/all",
	params={"lamin":lamin,"lamax":lamax,"lomin":lomin,"lomax":lomax},
	headers=UA, timeout=30
	)
	log.info(f"[OpenSky] {name}: HTTP {r.status_code}")
	if r.status_code == 200:
	states = r.json().get("states") or []
	for s in states:
	if not s or len(s)<8 or s[6] is None or s[5] is None: continue
	ic=s[0] or ""; cs=(s[1] or "").strip()
	result.append(Aircraft(icao24=ic,callsign=cs or ic,
	lat=float(s[6]),lng=float(s[5]),alt=float(s[7] or s[13] or 0),
	velocity=float(s[9] or 0),heading=float(s[10] or 0),
	category="military" if is_mil(ic,cs) else "civil"))
	log.info(f"[OpenSky] {name}: {len(states)} aircraft")
	else:
	log.warning(f"[OpenSky] {name}: {r.status_code} - {r.text[:100]}")
	time.sleep(5) # Public API: 1 req per 5 sec
	except Exception as e:
	log.error(f"[OpenSky] {name} ERROR: {e}")
	log.info(f"[OpenSky] Total: {len(result)} ({sum(1 for a in result if a.category=='military')} mil)")
	return result

	# ═══ 2. Ships — DigiTraffic Public AIS (always works) ═══
	def fetch_ships() -> List[Ship]:
	"""Finnish DigiTraffic - free, no auth, reliable"""
	ships = []
	try:
	log.info("[Ships] Fetching DigiTraffic...")
	r = requests.get("https://meri.digitraffic.fi/api/vessel-location/v1/locations",
	headers=UA, timeout=20)
	log.info(f"[Ships] HTTP {r.status_code}")
	if r.status_code == 200:
	features = r.json().get("features", [])
	for f in features[:400]:
	p=f.get("properties",{}); c=f.get("geometry",{}).get("coordinates",[])
	if len(c)<2: continue
	ships.append(Ship(mmsi=str(p.get("mmsi","")),name="Vessel",
	lat=float(c[1]),lng=float(c[0]),course=float(p.get("cog",0)),
	speed=float(p.get("sog",0)),ship_type="cargo",source="DigiTraffic"))
	log.info(f"[Ships] {len(ships)} vessels")
	except Exception as e:
	log.error(f"[Ships] ERROR: {e}")
	return ships

	# ═══ 3. GDELT — Conflict Events ═══
	def fetch_gdelt() -> List[Event]:
	"""GDELT V2 API"""
	events = []; seen = set()
	queries = [
	("conflict OR military OR airstrike OR missile OR bombing", "eng"),
	("غارة OR صواريخ OR عسكري OR قصف", "ara"),
	]
	for q, lang in queries:
	try:
	log.info(f"[GDELT] Fetching '{q[:30]}'...")
	r = requests.get("https://api.gdeltproject.org/api/v2/doc/doc", params={
	"query":q, "mode":"artlist", "maxrecords":"40",
	"format":"json", "timespan":"24h", "sourcelang":lang,
	}, headers=UA, timeout=25)
	log.info(f"[GDELT] HTTP {r.status_code}, len={len(r.text)}")

	if r.status_code == 200 and r.text.strip():
	txt = r.text.strip()
	# GDELT sometimes returns non-JSON
	if not txt.startswith('{'):
	log.warning(f"[GDELT] Not JSON: {txt[:80]}")
	continue
	data = json.loads(txt)
	arts = data.get("articles", [])
	for a in arts:
	t=a.get("title","")
	if not t or t[:50] in seen: continue
	seen.add(t[:50])
	la=a.get("latitude"); lo=a.get("longitude")
	if la is None or lo is None: la,lo=geo_est(t)
	events.append(Event(
	id=hashlib.md5(t[:50].encode()).hexdigest()[:10],
	title=t[:200], lat=float(la or 33), lng=float(lo or 44),
	event_type=ev_type(t), location=a.get("sourcecountry",""),
	source_name=a.get("domain","GDELT"),
	source_url=a.get("url",""),
	date=a.get("seendate",""),
	))
	log.info(f"[GDELT] '{q[:20]}': {len(arts)} articles")
	time.sleep(3)
	except Exception as e:
	log.error(f"[GDELT] ERROR: {e}")

	# GeoJSON endpoint
	try:
	log.info("[GDELT] Fetching geo...")
	r = requests.get("https://api.gdeltproject.org/api/v2/geo/geo", params={
	"query":"military OR conflict OR airstrike",
	"format":"geojson","timespan":"24h","maxpoints":"80",
	}, headers=UA, timeout=25)
	if r.status_code == 200 and r.text.strip().startswith('{'):
	for f in json.loads(r.text).get("features",[]):
	c=f.get("geometry",{}).get("coordinates",[])
	if len(c)<2: continue
	p=f.get("properties",{})
	nm=str(p.get("name",p.get("html","Event")))[:200]
	if BeautifulSoup: nm=BeautifulSoup(nm,"html.parser").get_text()
	if nm[:50] in seen: continue
	seen.add(nm[:50])
	events.append(Event(id=hashlib.md5(nm[:40].encode()).hexdigest()[:10],
	title=nm,lat=float(c[1]),lng=float(c[0]),event_type=ev_type(nm),
	location="",source_name="GDELT",source_url="",
	date=datetime.now().strftime("%Y-%m-%d")))
	log.info(f"[GDELT] Geo: added points")
	except Exception as e:
	log.error(f"[GDELT] Geo ERROR: {e}")

	log.info(f"[GDELT] Total: {len(events)} events")
	return events

	# ═══ 4. RSS News ═══
	RSS = {
	"الجزيرة":"https://www.aljazeera.net/aljazeerarss/a7c186be-1baa-4bd4-9d80-a84db769f779/73d0e1b4-532f-45ef-b135-bfdff8b8cab9",
	"الجزيرة عاجل":"https://www.aljazeera.net/aljazeerarss/a7c186be-1baa-4bd4-9d80-a84db769f779/ce3b4f1b-3bc9-4c5f-9894-c1a07201fa7d",
	"BBC عربي":"https://feeds.bbci.co.uk/arabic/rss.xml",
	"سكاي نيوز":"https://www.skynewsarabia.com/web/rss",
	"العربية":"https://www.alarabiya.net/.mrss/ar.xml",
	"RT عربي":"https://arabic.rt.com/rss/",
	"France24":"https://www.france24.com/ar/rss",
	"Reuters":"https://feeds.reuters.com/Reuters/worldNews",
	"BBC World":"https://feeds.bbci.co.uk/news/world/rss.xml",
	"BBC ME":"https://feeds.bbci.co.uk/news/world/middle_east/rss.xml",
	"AJ EN":"https://www.aljazeera.com/xml/rss/all.xml",
	"BBC Europe":"https://feeds.bbci.co.uk/news/world/europe/rss.xml",
	"Defense News":"https://www.defensenews.com/arc/outboundfeeds/rss/",
	}

	def fetch_rss() -> List[News]:
	if not feedparser:
	log.error("[RSS] feedparser not installed!")
	return []
	items = []
	for name, url in RSS.items():
	try:
	fd = feedparser.parse(url)
	for e in fd.entries[:8]:
	t=e.get("title",""); s=e.get("summary",e.get("description",""))
	if not t: continue
	combined=f"{t} {s}".lower()
	summary = ""
	if s and BeautifulSoup:
	summary = BeautifulSoup(s,"html.parser").get_text()[:200]
	elif s:
	summary = re.sub('<[^>]+>','',s)[:200]
	items.append(News(title=t[:300],link=e.get("link",""),source=name,
	published=e.get("published",e.get("updated","")),
	summary=summary,
	is_military=any(k in combined for k in MIL_KW)))
	except Exception as e:
	log.warning(f"[RSS] {name}: {e}")
	items.sort(key=lambda x:(not x.is_military))
	log.info(f"[RSS] {len(items)} news ({sum(1 for i in items if i.is_military)} military)")
	return items

	# ═══ 5. Cameras ═══
	COUNTRIES = {"IL":("Israel",31.8,35.2),"UA":("Ukraine",48.4,37.8),"TR":("Turkey",39.9,32.9),
	"RU":("Russia",55.8,37.6),"IQ":("Iraq",33.3,44.4),"EG":("Egypt",30,31.2)}

	def fetch_cameras() -> List[Camera]:
	cams = []
	for code,(name,dlat,dlng) in COUNTRIES.items():
	try:
	r=requests.get(f"http://www.insecam.org/en/bycountry/{code}/",
	headers={**UA,"Referer":"http://www.insecam.org/"},timeout=15)
	if r.status_code==200 and BeautifulSoup:
	for i,img in enumerate(BeautifulSoup(r.text,"html.parser").select("img.thumbnail-item__img")[:5]):
	src=img.get("src","")
	if src: cams.append(Camera(id=f"c_{code}_{i}",lat=dlat+(i*0.03),
	lng=dlng+(i*0.03),country=name,url=src))
	except: pass
	log.info(f"[Cam] {len(cams)} cameras")
	return cams

	# ═══ Central Manager ═══
	class DataManager:
	def __init__(self):
	self.aircraft=[]; self.ships=[]; self.events=[]
	self.news=[]; self.cameras=[]
	self.status={}; self.last_update=0
	self.is_fetching=False; self.lock=threading.Lock()
	self.logs=deque(maxlen=100)

	def _log(self, msg):
	self.logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
	log.info(msg)

	def fetch_all(self):
	if self.is_fetching: return
	self.is_fetching=True; t0=time.time()
	self._log("⏳ بدء التحديث من جميع المصادر...")

	# 1) OpenSky
	try:
	ac=fetch_opensky()
	with self.lock: self.aircraft=ac
	mil=sum(1 for a in ac if a.category=="military")
	self.status["OpenSky"]={"s":"online" if ac else "error","c":len(ac)}
	self._log(f"✈️ OpenSky: {len(ac)} طائرة ({mil} عسكري)")
	except Exception as e:
	self.status["OpenSky"]={"s":"error","c":0}
	self._log(f"❌ OpenSky: {e}")

	# 2) Ships
	try:
	sh=fetch_ships()
	with self.lock: self.ships=sh
	self.status["Ships"]={"s":"online" if sh else "error","c":len(sh)}
	self._log(f"🚢 Ships: {len(sh)} سفينة")
	except Exception as e:
	self.status["Ships"]={"s":"error","c":0}
	self._log(f"❌ Ships: {e}")

	# 3) GDELT
	try:
	ev=fetch_gdelt()
	with self.lock:
	ids={e.id for e in ev}
	old=[c for c in self.events if time.time()-c.timestamp<86400 and c.id not in ids] if hasattr(self.events[0],'timestamp') and self.events else []
	self.events=ev+old if old else ev
	self.status["GDELT"]={"s":"online" if ev else "error","c":len(self.events)}
	self._log(f"⚔️ GDELT: {len(ev)} حدث نزاع")
	except Exception as e:
	self.status["GDELT"]={"s":"error","c":0}
	self._log(f"❌ GDELT: {e}")

	# 4) RSS
	try:
	nw=fetch_rss()
	with self.lock: self.news=nw
	self.status["RSS"]={"s":"online" if nw else "error","c":len(nw)}
	self._log(f"📰 RSS: {len(nw)} خبر")
	except Exception as e:
	self.status["RSS"]={"s":"error","c":0}
	self._log(f"❌ RSS: {e}")

	# 5) Cameras
	try:
	cm=fetch_cameras()
	with self.lock:
	if cm: self.cameras=cm
	self.status["Cameras"]={"s":"online" if cm else "error","c":len(self.cameras)}
	self._log(f"📷 Cameras: {len(cm)} كاميرا")
	except Exception as e:
	self.status["Cameras"]={"s":"error","c":0}
	self._log(f"❌ Cameras: {e}")

	self.last_update=time.time(); self.is_fetching=False
	self._log(f"✅ اكتمل في {time.time()-t0:.1f} ثانية")

	def to_json(self):
	with self.lock:
	mil=[a.to_dict() for a in self.aircraft if a.category=="military"]
	civ=[a.to_dict() for a in self.aircraft if a.category!="military"]
	return json.dumps({
	"aircraft":{"military":mil[:200],"civil":civ[:500],"total":len(self.aircraft)},
	"ships":[s.to_dict() for s in self.ships[:400]],
	"conflicts":[e.to_dict() for e in self.events[:200]],
	"news":[n.to_dict() for n in self.news[:100]],
	"cameras":[c.to_dict() for c in self.cameras[:50]],
	"meta":{"update_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S")},
	}, ensure_ascii=False)