File size: 2,571 Bytes
632a211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import spacy
import re
from transformers import pipeline
import gradio as gr

print("Loading models...")
try:
    nlp = spacy.load("en_core_web_sm")
    print("SpaCy model loaded successfully!")
except OSError as e:
    print(f"Error loading SpaCy model: {e}")
    nlp = None

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
print("Models loaded successfully!")

def analyze_osint(text):
    if not text:
        return "Please enter some text to analyze.", []

    summary = ""
    if len(text.split()) > 50:
        try:
            summary_result = summarizer(text, max_length=150, min_length=30, do_sample=False)
            summary = summary_result[0]['summary_text']
        except Exception as e:
            summary = f"Summarization error: {str(e)}"
    else:
        summary = "Text is too short to summarize."

    doc = nlp(text) if nlp else None
    names = []
    orgs = []
    locations = []
    if doc:
        names = list(set([ent.text for ent in doc.ents if ent.label_ == 'PERSON']))
        orgs = list(set([ent.text for ent in doc.ents if ent.label_ == 'ORG']))
        locations = list(set([ent.text for ent in doc.ents if ent.label_ == 'GPE']))

    emails = list(set(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)))
    ips = list(set(re.findall(r'\b\d{1,3}(?:\.\d{1,3}){3}\b', text)))
    socials = list(set(re.findall(r'@(\w{4,15})\b', text)))

    highlighted_entities = []

    def add_to_highlight(entities_list, label):
        for item in entities_list:
            highlighted_entities.append((item, label))

    add_to_highlight(names, "NAME")
    add_to_highlight(orgs, "ORG")
    add_to_highlight(locations, "LOCATION")
    add_to_highlight(emails, "EMAIL")
    add_to_highlight(ips, "IP")
    add_to_highlight(socials, "SOCIAL")

    return summary, highlighted_entities

iface = gr.Interface(
    fn=analyze_osint,
    inputs=gr.Textbox(lines=10, label="OSINT Text", placeholder="Paste your OSINT data here..."),
    outputs=[
        gr.Textbox(label="Executive Summary"),
        gr.HighlightedText(label="Extracted Entities", color_map={
            "NAME": "red",
            "ORG": "blue",
            "LOCATION": "green",
            "EMAIL": "orange",
            "IP": "purple",
            "SOCIAL": "teal"
        })
    ],
    title="OSINT Analysis Tool",
    description="Enter any unstructured text to extract key entities and generate a summary.",
    allow_flagging="never"
)

if __name__ == "__main__":
    print("===== Application Startup =====")
    iface.launch()