Update README.md
Browse files
README.md
CHANGED
|
@@ -32,7 +32,7 @@ This model is trained to identify SQL injection attacks, which are a type of cod
|
|
| 32 |
|
| 33 |
## Uses
|
| 34 |
|
| 35 |
-
###
|
| 36 |
|
| 37 |
To use the SQL Injection Classifier model, you can follow the code snippet below. This example demonstrates how to predict whether a given SQL query is normal or an injection attack.
|
| 38 |
|
|
@@ -95,44 +95,67 @@ This model was trained on a dataset of SQL queries and may exhibit certain limit
|
|
| 95 |
|
| 96 |
Users (both direct and downstream) should be aware of the potential risks of relying on the model in security-sensitive applications. Additional domain-specific testing and validation are recommended before deployment.
|
| 97 |
|
| 98 |
-
## How to Get Started with the Model
|
| 99 |
|
| 100 |
```python
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
from unsloth import FastLanguageModel
|
| 102 |
from transformers import AutoTokenizer
|
| 103 |
|
| 104 |
-
#
|
|
|
|
|
|
|
| 105 |
model_name = "shukdevdatta123/sql_injection_classifier_DeepSeek_R1_fine_tuned_model"
|
| 106 |
-
hf_token = "your hf tokens"
|
| 107 |
-
|
| 108 |
-
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 109 |
-
model_name=model_name,
|
| 110 |
-
load_in_4bit=True,
|
| 111 |
-
token=hf_token,
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
-
# Function for testing queries
|
| 115 |
-
def predict_sql_injection(query):
|
| 116 |
-
# Prepare the model for inference
|
| 117 |
-
inference_model = FastLanguageModel.for_inference(model)
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
```
|
| 137 |
## Training Details
|
| 138 |
|
|
|
|
| 32 |
|
| 33 |
## Uses
|
| 34 |
|
| 35 |
+
### Colab Use
|
| 36 |
|
| 37 |
To use the SQL Injection Classifier model, you can follow the code snippet below. This example demonstrates how to predict whether a given SQL query is normal or an injection attack.
|
| 38 |
|
|
|
|
| 95 |
|
| 96 |
Users (both direct and downstream) should be aware of the potential risks of relying on the model in security-sensitive applications. Additional domain-specific testing and validation are recommended before deployment.
|
| 97 |
|
| 98 |
+
## How to Get Started with the Model (Colab Streamlit)
|
| 99 |
|
| 100 |
```python
|
| 101 |
+
!pip install unsloth
|
| 102 |
+
|
| 103 |
+
%%writefile app.py
|
| 104 |
+
import streamlit as st
|
| 105 |
from unsloth import FastLanguageModel
|
| 106 |
from transformers import AutoTokenizer
|
| 107 |
|
| 108 |
+
# Streamlit UI for input
|
| 109 |
+
st.title("SQL Injection Classifier")
|
| 110 |
+
hf_token = st.text_input("Enter your Hugging Face Token", type="password")
|
| 111 |
model_name = "shukdevdatta123/sql_injection_classifier_DeepSeek_R1_fine_tuned_model"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
# Load the model and tokenizer when HF token is provided
|
| 114 |
+
if hf_token:
|
| 115 |
+
try:
|
| 116 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 117 |
+
model_name=model_name,
|
| 118 |
+
load_in_4bit=True,
|
| 119 |
+
token=hf_token,
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
# Function for testing queries
|
| 123 |
+
def predict_sql_injection(query):
|
| 124 |
+
# Prepare the model for inference
|
| 125 |
+
inference_model = FastLanguageModel.for_inference(model)
|
| 126 |
+
|
| 127 |
+
prompt = f"### Instruction:\nClassify the following SQL query as normal (0) or an injection attack (1).\n\n### Query:\n{query}\n\n### Classification:\n"
|
| 128 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
| 129 |
+
|
| 130 |
+
# Use the inference model for generation
|
| 131 |
+
outputs = inference_model.generate(
|
| 132 |
+
input_ids=inputs.input_ids,
|
| 133 |
+
attention_mask=inputs.attention_mask,
|
| 134 |
+
max_new_tokens=1000,
|
| 135 |
+
use_cache=True,
|
| 136 |
+
)
|
| 137 |
+
prediction = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 138 |
+
return prediction.split("### Classification:\n")[-1].strip()
|
| 139 |
+
|
| 140 |
+
# Input query from the user
|
| 141 |
+
query = st.text_area("Enter an SQL query to test for injection", "")
|
| 142 |
+
|
| 143 |
+
# Add a button to classify the query
|
| 144 |
+
if st.button("Classify SQL Injection"):
|
| 145 |
+
if query:
|
| 146 |
+
result = predict_sql_injection(query)
|
| 147 |
+
st.write(f"Prediction: {result}")
|
| 148 |
+
else:
|
| 149 |
+
st.write("Please enter a SQL query first.")
|
| 150 |
+
|
| 151 |
+
except Exception as e:
|
| 152 |
+
st.error(f"Error loading model: {str(e)}")
|
| 153 |
+
else:
|
| 154 |
+
st.write("Please enter your Hugging Face token to proceed.")
|
| 155 |
+
|
| 156 |
+
!pip install streamlit
|
| 157 |
+
|
| 158 |
+
!streamlit run app.py & npx localtunnel --port 8501
|
| 159 |
```
|
| 160 |
## Training Details
|
| 161 |
|