Spaces:

v0idalism
/

blacklight-text

Sleeping

App Files Files Community

v0idalism commited on Aug 15, 2025

Commit

e381b33

verified ·

1 Parent(s): b7d7da8

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -25

app.py CHANGED Viewed

@@ -1,15 +1,12 @@
 import gradio as gr
-from transformers import pipeline
-# Load the model
-generator = pipeline(
-    "text-generation",
-    model="Qwen/Qwen2.5-7B-Instruct",  # Change to your actual HF model
-    device_map="auto"
-)
-# Persistent BLACKLIGHT style system prompt
-SYSTEM_PROMPT = (
     "System: You are BLACKLIGHT, created by v0id under AWAKEN CULT VISIONS. "
     "Always reply in the style of BLACKLIGHT: brutalist, minimal, precise.\n\n"
     "MODE: TRUTH\n"
@@ -17,34 +14,50 @@ SYSTEM_PROMPT = (
     "Avoid metaphors or flowery language.\n\n"
 )
-# Chat function
-def chat_with_blacklight(user_message):
-    if not user_message.strip():
         return "[Error: Empty prompt]"
     try:
-        full_prompt = f"{SYSTEM_PROMPT}User: {user_message}\nAssistant:"
-        result = generator(
-            full_prompt,
-            max_new_tokens=200,
             temperature=0.7,
             top_p=0.9,
-            do_sample=True
         )
-        return result[0]["generated_text"].replace(full_prompt, "").strip()
     except Exception as e:
-        return f"[Error: {str(e)}]"
-# Create the Gradio interface
 iface = gr.Interface(
-    fn=chat_with_blacklight,
-    inputs=gr.Textbox(lines=2, placeholder="Type your message..."),
     outputs=gr.Textbox(),
     title="BLACKLIGHT by v0id",
-    description="Brutalist • Minimal • Precise — Clinical analysis by BLACKLIGHT"
 )
-# Enable queue for slow model loading
 iface.queue()
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
+# Pick a SMALL, CPU-friendly model to guarantee it runs on free hardware.
+# You can swap to another small instruct model later if you like.
+MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+BLACKLIGHT_SYSTEM = (
     "System: You are BLACKLIGHT, created by v0id under AWAKEN CULT VISIONS. "
     "Always reply in the style of BLACKLIGHT: brutalist, minimal, precise.\n\n"
     "MODE: TRUTH\n"
     "Avoid metaphors or flowery language.\n\n"
 )
+# ---- Load model on CPU (no accelerate, no device_map) ----
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.float32,      # CPU-safe dtype
+    low_cpu_mem_usage=False,        # avoid accelerate path
+    trust_remote_code=True
+)
+pipe = TextGenerationPipeline(model=model, tokenizer=tokenizer, device=-1)
+def chat(user_message: str):
+    user_message = (user_message or "").strip()
+    if not user_message:
         return "[Error: Empty prompt]"
+    prompt = f"{BLACKLIGHT_SYSTEM}User: {user_message}\nAssistant:"
     try:
+        out = pipe(
+            prompt,
+            max_new_tokens=192,
             temperature=0.7,
             top_p=0.9,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
         )
+        # TextGenerationPipeline returns a list of dicts: [{'generated_text': '...'}]
+        full_text = out[0]["generated_text"]
+        # Return ONLY the assistant portion so your frontend gets clean text
+        reply = full_text.split("Assistant:", 1)[-1].strip()
+        return reply or "[Error: Model returned empty text]"
     except Exception as e:
+        return f"[Error: {e}]"
+# Gradio interface (simple single-text in/out ensures data[0] is a string)
 iface = gr.Interface(
+    fn=chat,
+    inputs=gr.Textbox(lines=2, placeholder="Type your message…"),
     outputs=gr.Textbox(),
     title="BLACKLIGHT by v0id",
+    description="Brutalist • Minimal • Precise — Clinical analysis by BLACKLIGHT",
 )
+# Enable queue; let HF serve the app (no custom FastAPI needed)
 iface.queue()
 if __name__ == "__main__":
+    iface.launch()