darkbat commited on
Commit
632a211
·
verified ·
1 Parent(s): 2fbfe1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -79
app.py CHANGED
@@ -1,79 +1,81 @@
1
- import spacy
2
- import re
3
- from transformers import pipeline
4
- import gradio as gr
5
-
6
- print("Loading models...")
7
- try:
8
- nlp = spacy.load("en_core_web_sm")
9
- print("SpaCy model loaded successfully!")
10
- except OSError as e:
11
- print(f"Error loading SpaCy model: {e}")
12
- nlp = None
13
-
14
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
- print("Models loaded successfully!")
16
-
17
- def analyze_osint(text):
18
- if not text:
19
- return "Please enter some text to analyze.", {}
20
- summary = ""
21
- if len(text.split()) > 50:
22
- try:
23
- summary_result = summarizer(text, max_length=150, min_length=30, do_sample=False)
24
- summary = summary_result[0]['summary_text']
25
- except Exception as e:
26
- summary = f"Summarization error: {str(e)}"
27
- else:
28
- summary = "Text is too short to summarize."
29
-
30
- doc = nlp(text) if nlp else None
31
- names = []
32
- orgs = []
33
- locations = []
34
- if doc:
35
- names = list(set([ent.text for ent in doc.ents if ent.label_ == 'PERSON']))
36
- orgs = list(set([ent.text for ent in doc.ents if ent.label_ == 'ORG']))
37
- locations = list(set([ent.text for ent in doc.ents if ent.label_ == 'GPE']))
38
-
39
- emails = list(set(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)))
40
- ips = list(set(re.findall(r'\b\d{1,3}(?:\.\d{1,3}){3}\b', text)))
41
- socials = list(set(re.findall(r'@(\w{4,15})\b', text)))
42
-
43
- highlighted_entities = []
44
- def add_to_highlight(entities_list, label):
45
- for item in entities_list:
46
- for match in re.finditer(re.escape(item), text):
47
- highlighted_entities.append((match.start(), match.end(), label))
48
-
49
- add_to_highlight(names, "NAME")
50
- add_to_highlight(orgs, "ORG")
51
- add_to_highlight(locations, "LOCATION")
52
- add_to_highlight(emails, "EMAIL")
53
- add_to_highlight(ips, "IP")
54
- add_to_highlight(socials, "SOCIAL")
55
-
56
- return summary, (text, highlighted_entities)
57
-
58
- iface = gr.Interface(
59
- fn=analyze_osint,
60
- inputs=gr.Textbox(lines=10, label="OSINT Text", placeholder="Paste your OSINT data here..."),
61
- outputs=[
62
- gr.Textbox(label="Executive Summary"),
63
- gr.HighlightedText(label="Extracted Entities", color_map={
64
- "NAME": "red",
65
- "ORG": "blue",
66
- "LOCATION": "green",
67
- "EMAIL": "orange",
68
- "IP": "purple",
69
- "SOCIAL": "teal"
70
- })
71
- ],
72
- title="OSINT Analysis Tool",
73
- description="Enter any unstructured text to extract key entities and generate a summary.",
74
- allow_flagging="never"
75
- )
76
-
77
- if __name__ == "__main__":
78
- print("===== Application Startup =====")
79
- iface.launch()
 
 
 
1
+ import spacy
2
+ import re
3
+ from transformers import pipeline
4
+ import gradio as gr
5
+
6
+ print("Loading models...")
7
+ try:
8
+ nlp = spacy.load("en_core_web_sm")
9
+ print("SpaCy model loaded successfully!")
10
+ except OSError as e:
11
+ print(f"Error loading SpaCy model: {e}")
12
+ nlp = None
13
+
14
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
+ print("Models loaded successfully!")
16
+
17
+ def analyze_osint(text):
18
+ if not text:
19
+ return "Please enter some text to analyze.", []
20
+
21
+ summary = ""
22
+ if len(text.split()) > 50:
23
+ try:
24
+ summary_result = summarizer(text, max_length=150, min_length=30, do_sample=False)
25
+ summary = summary_result[0]['summary_text']
26
+ except Exception as e:
27
+ summary = f"Summarization error: {str(e)}"
28
+ else:
29
+ summary = "Text is too short to summarize."
30
+
31
+ doc = nlp(text) if nlp else None
32
+ names = []
33
+ orgs = []
34
+ locations = []
35
+ if doc:
36
+ names = list(set([ent.text for ent in doc.ents if ent.label_ == 'PERSON']))
37
+ orgs = list(set([ent.text for ent in doc.ents if ent.label_ == 'ORG']))
38
+ locations = list(set([ent.text for ent in doc.ents if ent.label_ == 'GPE']))
39
+
40
+ emails = list(set(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)))
41
+ ips = list(set(re.findall(r'\b\d{1,3}(?:\.\d{1,3}){3}\b', text)))
42
+ socials = list(set(re.findall(r'@(\w{4,15})\b', text)))
43
+
44
+ highlighted_entities = []
45
+
46
+ def add_to_highlight(entities_list, label):
47
+ for item in entities_list:
48
+ highlighted_entities.append((item, label))
49
+
50
+ add_to_highlight(names, "NAME")
51
+ add_to_highlight(orgs, "ORG")
52
+ add_to_highlight(locations, "LOCATION")
53
+ add_to_highlight(emails, "EMAIL")
54
+ add_to_highlight(ips, "IP")
55
+ add_to_highlight(socials, "SOCIAL")
56
+
57
+ return summary, highlighted_entities
58
+
59
+ iface = gr.Interface(
60
+ fn=analyze_osint,
61
+ inputs=gr.Textbox(lines=10, label="OSINT Text", placeholder="Paste your OSINT data here..."),
62
+ outputs=[
63
+ gr.Textbox(label="Executive Summary"),
64
+ gr.HighlightedText(label="Extracted Entities", color_map={
65
+ "NAME": "red",
66
+ "ORG": "blue",
67
+ "LOCATION": "green",
68
+ "EMAIL": "orange",
69
+ "IP": "purple",
70
+ "SOCIAL": "teal"
71
+ })
72
+ ],
73
+ title="OSINT Analysis Tool",
74
+ description="Enter any unstructured text to extract key entities and generate a summary.",
75
+ allow_flagging="never"
76
+ )
77
+
78
+ if __name__ == "__main__":
79
+ print("===== Application Startup =====")
80
+ iface.launch()
81
+