Spaces:

NotRev
/

ThesisPlease

Sleeping

NotRev commited on Dec 11, 2025

Commit

cffdf8b

verified ·

1 Parent(s): e7e7f38

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -1,35 +1,34 @@
 import json, re, ast, streamlit as st
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import torch
-import os # Necessary to read the HF_TOKEN from environment variables
-# Model ID for the small, structured Gemma model
-model_id = "google/gemma-2b-it"
-# 1. READ THE TOKEN: Get the Hugging Face Token from the Space Secrets
-HF_TOKEN = os.environ.get("HF_TOKEN")
-# 2. PASS THE TOKEN to the tokenizer loading
-tok = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
-# Simplified Model Loading (trying to avoid complex quantization)
 try:
-    # Attempt to load using bfloat16 for efficiency
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.bfloat16,
         device_map="auto",
-        token=HF_TOKEN # PASS THE TOKEN to the model loading
     )
 except Exception:
-    # Fallback to float16 if bfloat16 is not supported
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.float16,
         device_map="auto",
-        token=HF_TOKEN
     )
 gen = pipeline("text-generation", model=model, tokenizer=tok,
                max_new_tokens=256, do_sample=False, return_full_text=False)
@@ -58,6 +57,8 @@ def extract(text: str):
                 continue
     if not isinstance(data, dict):
         return {
             "SKILL": ["(Error: Invalid/Corrupted Model Output)"],
             "KNOWLEDGE": [],

 import json, re, ast, streamlit as st
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import torch
+import os
+# NEW MODEL: Phi-2 - Does NOT use sentencepiece
+model_id = "microsoft/phi-2"
+# Token is NOT needed for Phi-2
+# HF_TOKEN = os.environ.get("HF_TOKEN") # Removed
+tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+# Model loading remains the same
 try:
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.bfloat16,
         device_map="auto",
+        trust_remote_code=True
+        # token=HF_TOKEN # Removed
     )
 except Exception:
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         torch_dtype=torch.float16,
         device_map="auto",
+        trust_remote_code=True
+        # token=HF_TOKEN # Removed
     )
+# ... rest of the pipeline and extraction code is the same ...
 gen = pipeline("text-generation", model=model, tokenizer=tok,
                max_new_tokens=256, do_sample=False, return_full_text=False)
                 continue
     if not isinstance(data, dict):
+        # NOTE: You are now hitting a KeyError: "SKILL" (image_36e619.png).
+        # This is because the model returned bad JSON. This is the code that handles it:
         return {
             "SKILL": ["(Error: Invalid/Corrupted Model Output)"],
             "KNOWLEDGE": [],