AlyanAkram commited on
Commit
7c4688e
·
verified ·
1 Parent(s): d090749

Update detector/custom_model.py

Browse files
Files changed (1) hide show
  1. detector/custom_model.py +9 -10
detector/custom_model.py CHANGED
@@ -8,36 +8,34 @@ from reportlab.pdfgen import canvas
8
  from reportlab.lib import colors
9
  import nltk
10
 
11
- # === Environment (no runtime directory creation) ===
12
  os.environ["HF_HOME"] = "/tmp/hf_home"
13
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_home"
14
  os.environ["NLTK_DATA"] = "/tmp/nltk_data"
15
-
16
- # Append pre-installed punkt path (you must ensure it's there)
17
  nltk.data.path.append("/tmp/nltk_data")
18
 
19
- # === Model loading: Hugging Face (Remote) vs Local ===
20
  USE_HF_MODEL = os.getenv("USE_HF_MODEL") == "1"
 
 
21
 
22
  if USE_HF_MODEL:
23
- hf_token = os.getenv("HF_TOKEN")
24
- MODEL_PATH = "AlyanAkram/stealth-roberta"
25
  tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, token=hf_token)
26
  model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, token=hf_token)
27
  else:
28
- MODEL_PATH = "./detector/models/roberta-detector"
29
  tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
30
  model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, local_files_only=True)
31
 
32
  model.eval().to("cuda" if torch.cuda.is_available() else "cpu")
33
  device = next(model.parameters()).device
34
 
35
- # === AI classification threshold ===
36
  AI_THRESHOLD = 0.5
37
-
38
- # === Output reports location (this must already be writable in /tmp) ===
39
  REPORT_DIR = Path("/tmp/reports")
40
 
 
41
  def analyze_text(text: str):
42
  results = []
43
  paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
@@ -67,6 +65,7 @@ def analyze_text(text: str):
67
  "results": results
68
  }
69
 
 
70
  def generate_pdf_report(results: dict, filename: str) -> str:
71
  REPORT_DIR.mkdir(exist_ok=True)
72
  pdf_path = REPORT_DIR / f"{filename}.pdf"
 
8
  from reportlab.lib import colors
9
  import nltk
10
 
11
+ # === Environment Setup ===
12
  os.environ["HF_HOME"] = "/tmp/hf_home"
13
  os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_home"
14
  os.environ["NLTK_DATA"] = "/tmp/nltk_data"
 
 
15
  nltk.data.path.append("/tmp/nltk_data")
16
 
17
+ # === Model Source (Hugging Face or Local) ===
18
  USE_HF_MODEL = os.getenv("USE_HF_MODEL") == "1"
19
+ hf_token = os.getenv("HF_TOKEN")
20
+ MODEL_PATH = "AlyanAkram/stealth-roberta" if USE_HF_MODEL else "./detector/models/roberta-detector"
21
 
22
  if USE_HF_MODEL:
23
+ print("🔐 Loading model from Hugging Face Hub...")
 
24
  tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, token=hf_token)
25
  model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, token=hf_token)
26
  else:
27
+ print("📁 Loading model from local files...")
28
  tokenizer = RobertaTokenizer.from_pretrained(MODEL_PATH, local_files_only=True)
29
  model = RobertaForSequenceClassification.from_pretrained(MODEL_PATH, local_files_only=True)
30
 
31
  model.eval().to("cuda" if torch.cuda.is_available() else "cpu")
32
  device = next(model.parameters()).device
33
 
34
+ # === Constants ===
35
  AI_THRESHOLD = 0.5
 
 
36
  REPORT_DIR = Path("/tmp/reports")
37
 
38
+ # === Main Analysis Function ===
39
  def analyze_text(text: str):
40
  results = []
41
  paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
 
65
  "results": results
66
  }
67
 
68
+ # === PDF Report Generator ===
69
  def generate_pdf_report(results: dict, filename: str) -> str:
70
  REPORT_DIR.mkdir(exist_ok=True)
71
  pdf_path = REPORT_DIR / f"{filename}.pdf"