Basu03 commited on
Commit
415f16d
·
1 Parent(s): 8ed3f2f

used lru caching

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
src/local_llm_handler.py CHANGED
@@ -2,12 +2,12 @@
2
 
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
- import streamlit as st
6
-
7
  import os
 
8
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
9
 
10
- @st.cache_resource
11
  def load_llm_pipeline():
12
  """
13
  Loads and caches the local LLM pipeline using Phi-3-mini-4k-instruct.
@@ -20,8 +20,8 @@ def load_llm_pipeline():
20
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
21
  model = AutoModelForCausalLM.from_pretrained(
22
  model_name,
23
- device_map="auto", # Automatically uses GPU if available
24
- torch_dtype=torch.float32, # Use float16 for memory efficiency
25
  trust_remote_code=True
26
  )
27
 
@@ -37,7 +37,6 @@ def load_llm_pipeline():
37
  print("--- Phi-3-mini model loaded successfully ---")
38
  return llm_pipeline
39
 
40
-
41
  def get_llm_response(prompt: str) -> str:
42
  """
43
  Gets a response from the cached Phi-3-mini LLM pipeline.
 
2
 
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
+ from functools import lru_cache
 
6
  import os
7
+
8
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
9
 
10
+ @lru_cache(maxsize=1)
11
  def load_llm_pipeline():
12
  """
13
  Loads and caches the local LLM pipeline using Phi-3-mini-4k-instruct.
 
20
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
21
  model = AutoModelForCausalLM.from_pretrained(
22
  model_name,
23
+ device_map="auto",
24
+ torch_dtype=torch.float32,
25
  trust_remote_code=True
26
  )
27
 
 
37
  print("--- Phi-3-mini model loaded successfully ---")
38
  return llm_pipeline
39
 
 
40
  def get_llm_response(prompt: str) -> str:
41
  """
42
  Gets a response from the cached Phi-3-mini LLM pipeline.
src/perplexity_detector.py CHANGED
@@ -2,9 +2,9 @@
2
 
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
- import streamlit as st
6
 
7
- @st.cache_resource
8
  def load_detector_model():
9
  """Loads and caches the gpt-2 model for perplexity calculation."""
10
  print("--- Loading detector model (gpt-2) for the first time... ---")
@@ -28,4 +28,4 @@ def is_ai_generated(text: str, threshold: float = 45.0) -> bool:
28
  print("AI: (Calculating perplexity...)")
29
  perplexity = calculate_perplexity(text)
30
  print(f"AI: (Perplexity score: {perplexity:.2f}, Threshold: {threshold})")
31
- return perplexity < threshold
 
2
 
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from functools import lru_cache
6
 
7
+ @lru_cache(maxsize=1)
8
  def load_detector_model():
9
  """Loads and caches the gpt-2 model for perplexity calculation."""
10
  print("--- Loading detector model (gpt-2) for the first time... ---")
 
28
  print("AI: (Calculating perplexity...)")
29
  perplexity = calculate_perplexity(text)
30
  print(f"AI: (Perplexity score: {perplexity:.2f}, Threshold: {threshold})")
31
+ return perplexity < threshold