Spaces:

tensor-boy
/

ISE

Runtime error

App Files Files Community

fikird commited on Nov 29, 2024

Commit

9dd0b76

1 Parent(s): 12d42ff

Simplify dependencies and update OSINT engine for better compatibility

Browse files

Files changed (2) hide show

osint_engine.py +57 -157
requirements.txt +5 -14

osint_engine.py CHANGED Viewed

@@ -5,9 +5,7 @@ import time
 import asyncio
 import aiohttp
 import requests
-import instaloader
-import face_recognition
-import numpy as np
 from PIL import Image
 from io import BytesIO
 from typing import Dict, List, Any, Union
@@ -15,14 +13,11 @@ from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service
 from webdriver_manager.chrome import ChromeDriverManager
-from holehe.core import *
-from sherlock import sherlock
 from geopy.geocoders import Nominatim
 from waybackpy import WaybackMachineCDXServerAPI
-import phonenumbers
-from phonenumbers import geocoder, carrier, timezone
 import whois
 from datetime import datetime
 class OSINTEngine:
     """OSINT capabilities for advanced information gathering"""
@@ -36,120 +31,89 @@ class OSINTEngine:
     def setup_apis(self):
         """Initialize API clients"""
-        self.instagram = instaloader.Instaloader()
         self.geolocator = Nominatim(user_agent="intelligent_search")
     async def search_username(self, username: str) -> Dict[str, Any]:
         """Search for username across multiple platforms"""
-        results = {}
-        # Sherlock search
-        sherlock_results = await self.sherlock_search(username)
-        results['platforms'] = sherlock_results
-        # Email search
-        email_results = await self.search_email(f"{username}@gmail.com")
-        results['email'] = email_results
-        return results
-    async def sherlock_search(self, username: str) -> List[Dict[str, str]]:
-        """Search username using Sherlock"""
-        results = []
-        sites = sherlock.site_data()
         async with aiohttp.ClientSession() as session:
             tasks = []
-            for site_name, site_data in sites.items():
-                task = self.check_username(session, username, site_name, site_data)
                 tasks.append(task)
-            results = await asyncio.gather(*tasks)
-            return [r for r in results if r is not None]
-    async def check_username(self, session, username: str, site_name: str, site_data: Dict) -> Dict[str, str]:
-        """Check username on a specific platform"""
-        url = site_data.get('url', '').format(username=username)
-        if not url:
-            return None
         try:
             async with session.get(url) as response:
                 if response.status == 200:
                     return {
-                        'platform': site_name,
                         'url': url,
-                        'found': True
                     }
         except:
             pass
         return None
-    async def search_email(self, email: str) -> Dict[str, Any]:
-        """Search for email presence on various platforms"""
-        results = {}
-        modules = get_functions()
-        for module in modules:
-            try:
-                out = await module(email)
-                if out:
-                    results[module.__name__] = out
-            except:
-                continue
-        return results
     async def search_image(self, image_url: str) -> Dict[str, Any]:
-        """Reverse image search and face recognition"""
-        results = {}
         try:
-            # Download image
             response = requests.get(image_url)
             img = Image.open(BytesIO(response.content))
-            # Convert to face_recognition format
-            img_array = np.array(img)
-            face_locations = face_recognition.face_locations(img_array)
-            face_encodings = face_recognition.face_encodings(img_array, face_locations)
-            results['faces_found'] = len(face_locations)
-            results['face_locations'] = face_locations
-            # Perform reverse image search
-            results['reverse_search'] = await self.reverse_image_search(image_url)
-        except Exception as e:
-            results['error'] = str(e)
-        return results
-    async def reverse_image_search(self, image_url: str) -> List[Dict[str, str]]:
-        """Perform reverse image search"""
-        results = []
-        try:
-            driver = webdriver.Chrome(
-                service=Service(ChromeDriverManager().install()),
-                options=self.chrome_options
-            )
-            # Google Images
             search_url = f"https://lens.google.com/uploadbyurl?url={image_url}"
-            driver.get(search_url)
-            time.sleep(3)
-            # Extract results (simplified)
-            results.append({
                 'source': 'Google Lens',
-                'url': driver.current_url
             })
-            driver.quit()
         except Exception as e:
-            results.append({'error': str(e)})
         return results
@@ -157,9 +121,6 @@ class OSINTEngine:
         """Gather personal information from various sources"""
         results = {}
-        if 'phone' in data:
-            results['phone'] = self.analyze_phone_number(data['phone'])
         if 'location' in data:
             results['location'] = await self.analyze_location(data['location'])
@@ -168,20 +129,6 @@ class OSINTEngine:
         return results
-    def analyze_phone_number(self, phone: str) -> Dict[str, Any]:
-        """Analyze phone number information"""
-        try:
-            number = phonenumbers.parse(phone)
-            return {
-                'valid': phonenumbers.is_valid_number(number),
-                'type': phonenumbers.number_type(number),
-                'country': geocoder.description_for_number(number, "en"),
-                'carrier': carrier.name_for_number(number, "en"),
-                'timezone': timezone.time_zones_for_number(number)
-            }
-        except Exception as e:
-            return {'error': str(e)}
     async def analyze_location(self, location: str) -> Dict[str, Any]:
         """Analyze location information"""
         try:
@@ -211,47 +158,6 @@ class OSINTEngine:
         except Exception as e:
             return {'error': str(e)}
-    async def search_social_media(self, username: str, platform: str = None) -> Dict[str, Any]:
-        """Search for user information on social media platforms"""
-        results = {}
-        if platform:
-            platforms = [platform]
-        else:
-            platforms = ['instagram', 'twitter', 'reddit']
-        for platform in platforms:
-            try:
-                if platform == 'instagram':
-                    results['instagram'] = await self.search_instagram(username)
-                elif platform == 'twitter':
-                    results['twitter'] = await self.search_twitter(username)
-                elif platform == 'reddit':
-                    results['reddit'] = await self.search_reddit(username)
-            except Exception as e:
-                results[platform] = {'error': str(e)}
-        return results
-    async def search_instagram(self, username: str) -> Dict[str, Any]:
-        """Search Instagram for user information"""
-        try:
-            profile = instaloader.Profile.from_username(self.instagram.context, username)
-            return {
-                'username': profile.username,
-                'full_name': profile.full_name,
-                'biography': profile.biography,
-                'followers': profile.followers,
-                'following': profile.followees,
-                'is_private': profile.is_private,
-                'is_verified': profile.is_verified,
-                'external_url': profile.external_url,
-                'posts_count': profile.mediacount,
-                'profile_pic_url': profile.profile_pic_url
-            }
-        except Exception as e:
-            return {'error': str(e)}
     async def search_historical_data(self, url: str) -> List[Dict[str, Any]]:
         """Search for historical data using Wayback Machine"""
         results = []
@@ -273,16 +179,6 @@ class OSINTEngine:
         return results
-    def solve_captcha(self, image_url: str) -> str:
-        """Solve CAPTCHA using OCR (simplified version)"""
-        try:
-            response = requests.get(image_url)
-            img = Image.open(BytesIO(response.content))
-            # Add your CAPTCHA solving logic here
-            return "CAPTCHA solution placeholder"
-        except Exception as e:
-            return f"Error: {str(e)}"
 # Helper function to create document from gathered information
 def create_report(data: Dict[str, Any], template: str = "default") -> str:
     """Create a formatted report from gathered information"""
@@ -297,7 +193,11 @@ def create_report(data: Dict[str, Any], template: str = "default") -> str:
                     report += f"* {key}: {value}\n"
             elif isinstance(content, list):
                 for item in content:
-                    report += f"* {item}\n"
             else:
                 report += f"{content}\n"
             report += "\n"

 import asyncio
 import aiohttp
 import requests
+import httpx
 from PIL import Image
 from io import BytesIO
 from typing import Dict, List, Any, Union
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service
 from webdriver_manager.chrome import ChromeDriverManager
 from geopy.geocoders import Nominatim
 from waybackpy import WaybackMachineCDXServerAPI
 import whois
 from datetime import datetime
+from googlesearch import search as google_search
 class OSINTEngine:
     """OSINT capabilities for advanced information gathering"""
     def setup_apis(self):
         """Initialize API clients"""
         self.geolocator = Nominatim(user_agent="intelligent_search")
+        self.http_client = httpx.AsyncClient()
     async def search_username(self, username: str) -> Dict[str, Any]:
         """Search for username across multiple platforms"""
+        results = {
+            'platforms': [],
+            'social_media': {},
+            'websites': []
+        }
+        # Common social media platforms
+        platforms = [
+            {'name': 'GitHub', 'url': f'https://github.com/{username}'},
+            {'name': 'Twitter', 'url': f'https://twitter.com/{username}'},
+            {'name': 'Instagram', 'url': f'https://instagram.com/{username}'},
+            {'name': 'LinkedIn', 'url': f'https://linkedin.com/in/{username}'},
+            {'name': 'Facebook', 'url': f'https://facebook.com/{username}'},
+            {'name': 'YouTube', 'url': f'https://youtube.com/@{username}'},
+        ]
         async with aiohttp.ClientSession() as session:
             tasks = []
+            for platform in platforms:
+                task = self.check_profile(session, platform['url'], platform['name'])
                 tasks.append(task)
+            platform_results = await asyncio.gather(*tasks)
+            results['platforms'] = [r for r in platform_results if r is not None]
+        # Google search for additional mentions
+        try:
+            search_query = f'"{username}" OR "@{username}" -site:twitter.com -site:facebook.com -site:instagram.com'
+            web_results = list(google_search(search_query, num_results=5))
+            results['websites'] = web_results
+        except Exception as e:
+            results['websites'] = [str(e)]
+        return results
+    async def check_profile(self, session, url: str, platform: str) -> Dict[str, str]:
+        """Check if a profile exists on a platform"""
         try:
             async with session.get(url) as response:
                 if response.status == 200:
                     return {
+                        'platform': platform,
                         'url': url,
+                        'exists': True
                     }
         except:
             pass
         return None
     async def search_image(self, image_url: str) -> Dict[str, Any]:
+        """Image analysis and reverse search"""
+        results = {
+            'analysis': {},
+            'similar_images': [],
+            'error': None
+        }
         try:
+            # Download and analyze image
             response = requests.get(image_url)
             img = Image.open(BytesIO(response.content))
+            # Basic image analysis
+            results['analysis'] = {
+                'format': img.format,
+                'size': img.size,
+                'mode': img.mode
+            }
+            # Perform reverse image search using Google Lens
             search_url = f"https://lens.google.com/uploadbyurl?url={image_url}"
+            results['similar_images'].append({
                 'source': 'Google Lens',
+                'url': search_url
             })
         except Exception as e:
+            results['error'] = str(e)
         return results
         """Gather personal information from various sources"""
         results = {}
         if 'location' in data:
             results['location'] = await self.analyze_location(data['location'])
         return results
     async def analyze_location(self, location: str) -> Dict[str, Any]:
         """Analyze location information"""
         try:
         except Exception as e:
             return {'error': str(e)}
     async def search_historical_data(self, url: str) -> List[Dict[str, Any]]:
         """Search for historical data using Wayback Machine"""
         results = []
         return results
 # Helper function to create document from gathered information
 def create_report(data: Dict[str, Any], template: str = "default") -> str:
     """Create a formatted report from gathered information"""
                     report += f"* {key}: {value}\n"
             elif isinstance(content, list):
                 for item in content:
+                    if isinstance(item, dict):
+                        for k, v in item.items():
+                            report += f"* {k}: {v}\n"
+                    else:
+                        report += f"* {item}\n"
             else:
                 report += f"{content}\n"
             report += "\n"

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 gradio==4.14.0
-torch==2.1.0
 transformers==4.35.2
 duckduckgo-search==3.9.3
 beautifulsoup4==4.12.2
@@ -11,18 +11,9 @@ protobuf==4.25.1
 pillow==10.1.0
 selenium==4.15.2
 webdriver-manager==4.0.1
-socid-extractor==0.0.24
-holehe==1.61
-sherlock3==0.1
-python-magic==0.4.27
-face-recognition==1.3.0
-opencv-python-headless==4.8.1.78
-googlesearch-python==1.2.3
-instaloader==4.10.1
-tweepy==4.14.0
-praw==7.7.1
-geopy==2.4.1
-phonenumbers==8.13.24
-python-whois==0.8.0
 aiohttp==3.9.1
 waybackpy==3.0.6

 gradio==4.14.0
+torch==2.1.0+cpu
 transformers==4.35.2
 duckduckgo-search==3.9.3
 beautifulsoup4==4.12.2
 pillow==10.1.0
 selenium==4.15.2
 webdriver-manager==4.0.1
 aiohttp==3.9.1
+python-whois==0.8.0
+geopy==2.4.1
+httpx==0.25.2
+googlesearch-python==1.2.3
 waybackpy==3.0.6