| import spacy | |
| import re | |
| from transformers import pipeline | |
| import gradio as gr | |
| print("Loading models...") | |
| try: | |
| nlp = spacy.load("en_core_web_sm") | |
| print("SpaCy model loaded successfully!") | |
| except OSError as e: | |
| print(f"Error loading SpaCy model: {e}") | |
| nlp = None | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| print("Models loaded successfully!") | |
| def analyze_osint(text): | |
| if not text: | |
| return "Please enter some text to analyze.", [] | |
| summary = "" | |
| if len(text.split()) > 50: | |
| try: | |
| summary_result = summarizer(text, max_length=150, min_length=30, do_sample=False) | |
| summary = summary_result[0]['summary_text'] | |
| except Exception as e: | |
| summary = f"Summarization error: {str(e)}" | |
| else: | |
| summary = "Text is too short to summarize." | |
| doc = nlp(text) if nlp else None | |
| names = [] | |
| orgs = [] | |
| locations = [] | |
| if doc: | |
| names = list(set([ent.text for ent in doc.ents if ent.label_ == 'PERSON'])) | |
| orgs = list(set([ent.text for ent in doc.ents if ent.label_ == 'ORG'])) | |
| locations = list(set([ent.text for ent in doc.ents if ent.label_ == 'GPE'])) | |
| emails = list(set(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text))) | |
| ips = list(set(re.findall(r'\b\d{1,3}(?:\.\d{1,3}){3}\b', text))) | |
| socials = list(set(re.findall(r'@(\w{4,15})\b', text))) | |
| highlighted_entities = [] | |
| def add_to_highlight(entities_list, label): | |
| for item in entities_list: | |
| highlighted_entities.append((item, label)) | |
| add_to_highlight(names, "NAME") | |
| add_to_highlight(orgs, "ORG") | |
| add_to_highlight(locations, "LOCATION") | |
| add_to_highlight(emails, "EMAIL") | |
| add_to_highlight(ips, "IP") | |
| add_to_highlight(socials, "SOCIAL") | |
| return summary, highlighted_entities | |
| iface = gr.Interface( | |
| fn=analyze_osint, | |
| inputs=gr.Textbox(lines=10, label="OSINT Text", placeholder="Paste your OSINT data here..."), | |
| outputs=[ | |
| gr.Textbox(label="Executive Summary"), | |
| gr.HighlightedText(label="Extracted Entities", color_map={ | |
| "NAME": "red", | |
| "ORG": "blue", | |
| "LOCATION": "green", | |
| "EMAIL": "orange", | |
| "IP": "purple", | |
| "SOCIAL": "teal" | |
| }) | |
| ], | |
| title="OSINT Analysis Tool", | |
| description="Enter any unstructured text to extract key entities and generate a summary.", | |
| allow_flagging="never" | |
| ) | |
| if __name__ == "__main__": | |
| print("===== Application Startup =====") | |
| iface.launch() | |