Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,41 +25,67 @@ from io import BytesIO
|
|
| 25 |
class ImageScraper:
|
| 26 |
def __init__(self):
|
| 27 |
self.PIXABAY_API_KEY = "48069976-37e20099248207cee12385560" # Replace with your key
|
| 28 |
-
self.
|
| 29 |
-
|
| 30 |
-
|
| 31 |
|
| 32 |
-
def
|
| 33 |
try:
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
"cyber security",
|
| 38 |
-
"data protection",
|
| 39 |
-
"digital technology"
|
| 40 |
-
]
|
| 41 |
-
|
| 42 |
-
all_urls = []
|
| 43 |
-
for term in search_terms:
|
| 44 |
-
url = f"https://pixabay.com/api/?key={self.PIXABAY_API_KEY}&q={term.replace(' ', '+')}&image_type=photo&per_page=5"
|
| 45 |
-
response = requests.get(url)
|
| 46 |
data = response.json()
|
| 47 |
-
|
| 48 |
if 'hits' in data:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
return list(set(all_urls))[:num_images] if all_urls else self.stock_images
|
| 52 |
-
|
| 53 |
except Exception as e:
|
| 54 |
print(f"Pixabay API error: {e}")
|
| 55 |
-
return
|
| 56 |
|
| 57 |
def get_stock_images(self) -> List[str]:
|
| 58 |
-
"""Return preset stock images
|
| 59 |
return [
|
| 60 |
"https://images.pexels.com/photos/60504/security-protection-anti-virus-software-60504.jpeg",
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
def scrape_pexels(self, query: str) -> List[str]:
|
| 65 |
urls = []
|
|
|
|
| 25 |
class ImageScraper:
|
| 26 |
def __init__(self):
|
| 27 |
self.PIXABAY_API_KEY = "48069976-37e20099248207cee12385560" # Replace with your key
|
| 28 |
+
self.headers = {
|
| 29 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 30 |
+
}
|
| 31 |
|
| 32 |
+
def get_pixabay_images(self, query: str) -> List[str]:
|
| 33 |
try:
|
| 34 |
+
url = f"https://pixabay.com/api/?key={self.PIXABAY_API_KEY}&q={query.replace(' ', '+')}&image_type=photo&per_page=20"
|
| 35 |
+
response = requests.get(url)
|
| 36 |
+
if response.status_code == 200:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
data = response.json()
|
|
|
|
| 38 |
if 'hits' in data:
|
| 39 |
+
return [img['largeImageURL'] for img in data['hits']]
|
| 40 |
+
return []
|
|
|
|
|
|
|
| 41 |
except Exception as e:
|
| 42 |
print(f"Pixabay API error: {e}")
|
| 43 |
+
return []
|
| 44 |
|
| 45 |
def get_stock_images(self) -> List[str]:
|
| 46 |
+
"""Return preset stock images as fallback"""
|
| 47 |
return [
|
| 48 |
"https://images.pexels.com/photos/60504/security-protection-anti-virus-software-60504.jpeg",
|
| 49 |
+
"https://images.pexels.com/photos/5380642/pexels-photo-5380642.jpeg",
|
| 50 |
+
"https://images.pexels.com/photos/2582937/pexels-photo-2582937.jpeg",
|
| 51 |
+
"https://images.pexels.com/photos/7319074/pexels-photo-7319074.jpeg",
|
| 52 |
+
"https://images.pexels.com/photos/4164418/pexels-photo-4164418.jpeg",
|
| 53 |
+
"https://images.pexels.com/photos/3861969/pexels-photo-3861969.jpeg",
|
| 54 |
+
"https://images.pexels.com/photos/5473298/pexels-photo-5473298.jpeg",
|
| 55 |
+
"https://images.pexels.com/photos/4348401/pexels-photo-4348401.jpeg",
|
| 56 |
+
"https://images.pexels.com/photos/8386440/pexels-photo-8386440.jpeg",
|
| 57 |
+
"https://images.pexels.com/photos/5473950/pexels-photo-5473950.jpeg"
|
| 58 |
]
|
| 59 |
+
|
| 60 |
+
def get_images(self, query: str, num_images: int = 15) -> List[str]:
|
| 61 |
+
# First try Pixabay
|
| 62 |
+
images = self.get_pixabay_images(query)
|
| 63 |
+
|
| 64 |
+
# If no Pixabay images, try with technology-related terms
|
| 65 |
+
if not images:
|
| 66 |
+
tech_terms = ["digital security", "technology", "cyber security", "data protection"]
|
| 67 |
+
for term in tech_terms:
|
| 68 |
+
images.extend(self.get_pixabay_images(term))
|
| 69 |
+
|
| 70 |
+
# If still no images, use stock images
|
| 71 |
+
if not images:
|
| 72 |
+
images = self.get_stock_images()
|
| 73 |
+
|
| 74 |
+
# Remove duplicates and limit to num_images
|
| 75 |
+
return list(dict.fromkeys(images))[:num_images]
|
| 76 |
+
|
| 77 |
+
def generate_fallback_audio(self, script: str) -> AudioFileClip:
|
| 78 |
+
"""Generate fallback audio using gTTS"""
|
| 79 |
+
try:
|
| 80 |
+
audio_path = self.temp_dir / "voice.mp3"
|
| 81 |
+
tts = gTTS(text=script, lang='en', slow=False)
|
| 82 |
+
tts.save(str(audio_path))
|
| 83 |
+
return AudioFileClip(str(audio_path))
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f"Fallback audio generation failed: {e}")
|
| 86 |
+
# Create silent audio clip
|
| 87 |
+
duration = len(script.split()) * 0.3 # Approximate duration based on word count
|
| 88 |
+
return AudioFileClip(duration=duration)
|
| 89 |
|
| 90 |
def scrape_pexels(self, query: str) -> List[str]:
|
| 91 |
urls = []
|