Spaces:

Basu03
/

personal_excel_interviewer

Sleeping

Basu03 commited on Aug 1, 2025

Commit

415f16d

1 Parent(s): 8ed3f2f

used lru caching

Files changed (3) hide show

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

src/local_llm_handler.py CHANGED Viewed

@@ -2,12 +2,12 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-import streamlit as st
 import os
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-@st.cache_resource
 def load_llm_pipeline():
     """
     Loads and caches the local LLM pipeline using Phi-3-mini-4k-instruct.
@@ -20,8 +20,8 @@ def load_llm_pipeline():
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        device_map="auto",            # Automatically uses GPU if available
-        torch_dtype=torch.float32,    # Use float16 for memory efficiency
         trust_remote_code=True
     )
@@ -37,7 +37,6 @@ def load_llm_pipeline():
     print("--- Phi-3-mini model loaded successfully ---")
     return llm_pipeline
 def get_llm_response(prompt: str) -> str:
     """
     Gets a response from the cached Phi-3-mini LLM pipeline.

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from functools import lru_cache
 import os
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+@lru_cache(maxsize=1)
 def load_llm_pipeline():
     """
     Loads and caches the local LLM pipeline using Phi-3-mini-4k-instruct.
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        device_map="auto",
+        torch_dtype=torch.float32,
         trust_remote_code=True
     )
     print("--- Phi-3-mini model loaded successfully ---")
     return llm_pipeline
 def get_llm_response(prompt: str) -> str:
     """
     Gets a response from the cached Phi-3-mini LLM pipeline.

src/perplexity_detector.py CHANGED Viewed

@@ -2,9 +2,9 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-import streamlit as st
-@st.cache_resource
 def load_detector_model():
     """Loads and caches the gpt-2 model for perplexity calculation."""
     print("--- Loading detector model (gpt-2) for the first time... ---")
@@ -28,4 +28,4 @@ def is_ai_generated(text: str, threshold: float = 45.0) -> bool:
     print("AI: (Calculating perplexity...)")
     perplexity = calculate_perplexity(text)
     print(f"AI: (Perplexity score: {perplexity:.2f}, Threshold: {threshold})")
-    return perplexity < threshold

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from functools import lru_cache
+@lru_cache(maxsize=1)
 def load_detector_model():
     """Loads and caches the gpt-2 model for perplexity calculation."""
     print("--- Loading detector model (gpt-2) for the first time... ---")
     print("AI: (Calculating perplexity...)")
     perplexity = calculate_perplexity(text)
     print(f"AI: (Perplexity score: {perplexity:.2f}, Threshold: {threshold})")
+    return perplexity < threshold