amal / media_analyzer.py
osamabyc's picture
Upload 75 files
c15c04b verified
import os
import tempfile
import requests
from PIL import Image
from io import BytesIO
import PyPDF2
from urllib.parse import urlparse
import speech_recognition as sr
try:
import moviepy.editor as mp
MOVIEPY_AVAILABLE = True
except ImportError:
MOVIEPY_AVAILABLE = False
def analyze_url_type(url: str) -> str:
domain = urlparse(url).netloc.lower()
if "youtube.com" in domain or "youtu.be" in domain:
return "YouTube"
if "github.com" in domain:
return "GitHub"
if "twitter.com" in domain or "x.com" in domain:
return "تغريدة"
if domain.endswith(".pdf"):
return "ملف PDF"
return "موقع ويب عام"
def fix_url(url: str) -> str:
if not url.startswith(("http://", "https://")):
return "https://" + url.lstrip("//")
return url
def detect_media_type(url: str) -> str:
url = url.lower()
if url.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
return 'image'
elif url.endswith(('.mp4', '.mov', '.avi', '.webm')):
return 'video'
elif url.endswith(('.mp3', '.wav', '.ogg', '.m4a')):
return 'audio'
elif url.endswith('.pdf'):
return 'pdf'
return 'link'
def analyze_image_from_url(image_url: str) -> str:
response = requests.get(image_url)
response.raise_for_status()
image = Image.open(BytesIO(response.content))
return f"تحليل الصورة: الحجم {image.size}، الصيغة {image.format}"
def analyze_pdf_from_url(pdf_url: str) -> str:
response = requests.get(pdf_url)
response.raise_for_status()
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(response.content)
temp_path = temp_file.name
try:
with open(temp_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
text = "".join([page.extract_text() or "" for page in reader.pages])
return f"تم استخراج النص التالي من PDF:\n{text[:500]}..."
finally:
os.remove(temp_path)
def extract_text_from_audio_file(audio_path: str) -> str:
recognizer = sr.Recognizer()
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
try:
return recognizer.recognize_google(audio, language="ar-SA")
except sr.UnknownValueError:
return "لم أتمكن من التعرف على الصوت"
except sr.RequestError:
return "خطأ في الاتصال بخدمة التعرف على الصوت"
def analyze_audio_from_url(audio_url: str) -> str:
response = requests.get(audio_url)
response.raise_for_status()
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio.write(response.content)
temp_path = temp_audio.name
try:
text = extract_text_from_audio_file(temp_path)
return f"نص الصوت:\n{text}"
finally:
os.remove(temp_path)
def analyze_video_from_url(video_url: str) -> str:
if not MOVIEPY_AVAILABLE:
return "مكتبة moviepy غير متوفرة لتحليل الفيديو"
response = requests.get(video_url)
response.raise_for_status()
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
temp_video.write(response.content)
video_path = temp_video.name
audio_path = video_path.replace(".mp4", ".wav")
try:
video = mp.VideoFileClip(video_path)
video.audio.write_audiofile(audio_path, verbose=False, logger=None)
text = extract_text_from_audio_file(audio_path)
return f"نص الفيديو:\n{text}"
finally:
os.remove(video_path)
if os.path.exists(audio_path):
os.remove(audio_path)