darkbat commited on
Commit
2fbfe1c
·
verified ·
1 Parent(s): fed3ac4

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +54 -0
  2. README.md +14 -13
  3. app.py +79 -0
  4. gitattributes +35 -0
  5. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM docker.io/library/python:3.10@sha256:7118d485696a1eb1105ae30e3f55e5685117a9bc0c3ffbe3830a268911e0837d
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y fakeroot && \
5
+ mv /usr/bin/apt-get /usr/bin/.apt-get && \
6
+ echo '#!/usr/bin/env sh\nfakeroot /usr/bin/.apt-get $@' > /usr/bin/apt-get && \
7
+ chmod +x /usr/bin/apt-get && \
8
+ rm -rf /var/lib/apt/lists/* && \
9
+ useradd -m -u 1000 user
10
+
11
+ COPY --chown=1000:1000 --from=root / /
12
+
13
+ RUN pip install --no-cache-dir pip -U && \
14
+ pip install --no-cache-dir \
15
+ datasets \
16
+ "huggingface-hub>=0.19" \
17
+ "hf_xet>=1.0.0,<2.0.0" \
18
+ "hf-transfer>=0.1.4" \
19
+ "protobuf<4" \
20
+ "click<8.1" \
21
+ "pydantic~=1.0"
22
+
23
+ WORKDIR /home/user/app
24
+
25
+ RUN apt-get update && apt-get install -y \
26
+ git git-lfs ffmpeg libsm6 libxext6 cmake rsync libgl1-mesa-glx && \
27
+ rm -rf /var/lib/apt/lists/* && \
28
+ git lfs install
29
+
30
+ RUN apt-get update && \
31
+ apt-get install -y curl && \
32
+ curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
33
+ apt-get install -y nodejs && \
34
+ rm -rf /var/lib/apt/lists/* && apt-get clean
35
+
36
+ # Assuming your requirements.txt is in the root of your repository
37
+ COPY requirements.txt /tmp/requirements.txt
38
+
39
+ # Install Python dependencies from requirements.txt
40
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt
41
+
42
+ # Download the spaCy model and set the SPACY_DATA environment variable
43
+ RUN python -m spacy download en_core_web_sm
44
+ RUN python -c "import spacy; print(spacy.util.get_data_path())"
45
+ ENV SPACY_DATA=$(python -c "import spacy; print(spacy.util.get_data_path())")
46
+
47
+ # Copy your application code
48
+ COPY . /home/user/app
49
+
50
+ # Set the user context
51
+ USER user
52
+
53
+ # Define the command to run your application (adjust if your main file is named differently)
54
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,13 +1,14 @@
1
- ---
2
- title: DarkTrawl
3
- emoji: 📈
4
- colorFrom: green
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.31.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
+ ---
2
+ title: My Osint Tool
3
+ emoji: 🦀
4
+ colorFrom: purple
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.31.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: oshint_model_for_infromation_gathering
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import re
3
+ from transformers import pipeline
4
+ import gradio as gr
5
+
6
+ print("Loading models...")
7
+ try:
8
+ nlp = spacy.load("en_core_web_sm")
9
+ print("SpaCy model loaded successfully!")
10
+ except OSError as e:
11
+ print(f"Error loading SpaCy model: {e}")
12
+ nlp = None
13
+
14
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
+ print("Models loaded successfully!")
16
+
17
+ def analyze_osint(text):
18
+ if not text:
19
+ return "Please enter some text to analyze.", {}
20
+ summary = ""
21
+ if len(text.split()) > 50:
22
+ try:
23
+ summary_result = summarizer(text, max_length=150, min_length=30, do_sample=False)
24
+ summary = summary_result[0]['summary_text']
25
+ except Exception as e:
26
+ summary = f"Summarization error: {str(e)}"
27
+ else:
28
+ summary = "Text is too short to summarize."
29
+
30
+ doc = nlp(text) if nlp else None
31
+ names = []
32
+ orgs = []
33
+ locations = []
34
+ if doc:
35
+ names = list(set([ent.text for ent in doc.ents if ent.label_ == 'PERSON']))
36
+ orgs = list(set([ent.text for ent in doc.ents if ent.label_ == 'ORG']))
37
+ locations = list(set([ent.text for ent in doc.ents if ent.label_ == 'GPE']))
38
+
39
+ emails = list(set(re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)))
40
+ ips = list(set(re.findall(r'\b\d{1,3}(?:\.\d{1,3}){3}\b', text)))
41
+ socials = list(set(re.findall(r'@(\w{4,15})\b', text)))
42
+
43
+ highlighted_entities = []
44
+ def add_to_highlight(entities_list, label):
45
+ for item in entities_list:
46
+ for match in re.finditer(re.escape(item), text):
47
+ highlighted_entities.append((match.start(), match.end(), label))
48
+
49
+ add_to_highlight(names, "NAME")
50
+ add_to_highlight(orgs, "ORG")
51
+ add_to_highlight(locations, "LOCATION")
52
+ add_to_highlight(emails, "EMAIL")
53
+ add_to_highlight(ips, "IP")
54
+ add_to_highlight(socials, "SOCIAL")
55
+
56
+ return summary, (text, highlighted_entities)
57
+
58
+ iface = gr.Interface(
59
+ fn=analyze_osint,
60
+ inputs=gr.Textbox(lines=10, label="OSINT Text", placeholder="Paste your OSINT data here..."),
61
+ outputs=[
62
+ gr.Textbox(label="Executive Summary"),
63
+ gr.HighlightedText(label="Extracted Entities", color_map={
64
+ "NAME": "red",
65
+ "ORG": "blue",
66
+ "LOCATION": "green",
67
+ "EMAIL": "orange",
68
+ "IP": "purple",
69
+ "SOCIAL": "teal"
70
+ })
71
+ ],
72
+ title="OSINT Analysis Tool",
73
+ description="Enter any unstructured text to extract key entities and generate a summary.",
74
+ allow_flagging="never"
75
+ )
76
+
77
+ if __name__ == "__main__":
78
+ print("===== Application Startup =====")
79
+ iface.launch()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==5.31.0
2
+ spacy==3.8.0
3
+ transformers==4.52.3
4
+ torch==2.7.0
5
+ sentencepiece==0.2.0