Spaces:

Invescoz
/

Server-B

Runtime error

App Files Files Community

Invescoz commited on Aug 27, 2025

Commit

ee0fb52

verified ·

1 Parent(s): ce385a3

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -17

app.py CHANGED Viewed

@@ -1,24 +1,26 @@
 import gradio as gr
 import subprocess
 import sys
-import os
 from typing import Generator
-# Install llama-cpp-python at runtime if not found
 try:
-    from llama_cpp import Llama
 except ImportError:
-    print("Installing llama-cpp-python...")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python==0.2.85"])
-    from llama_cpp import Llama
-# Initialize model
-model_path = "tinyllama-1.1b-chat-v1.0.Q4_0.gguf"  # Downloaded from TinyLlama/TinyLlama-1.1B-Chat-v1.0-GGUF
-llm = Llama.from_pretrained(
-    repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-    filename=model_path,
-    n_ctx=2048,  # Context length for prompts
-    n_threads=2  # Use 2 CPU cores
 )
 def generate_astrology_prediction(prompt: str) -> Generator[str, None, None]:
@@ -28,15 +30,28 @@ def generate_astrology_prediction(prompt: str) -> Generator[str, None, None]:
     system_prompt = (
         "You are an expert astrologer, specializing in fortune-telling. Given a user prompt "
         "containing details like zodiac sign, birth date, or specific questions, provide predictions "
-        "about their future, career, love life, and success. Stream the output line by line. "
         "Use bullet points for key predictions and keep responses engaging and concise. "
-        "If the prompt is vague (e.g., 'Hi'), ask for more details like zodiac sign or birth date."
     )
     full_prompt = f"<|SYSTEM|> {system_prompt}\n<|USER|> {prompt}\n<|ASSISTANT|>"
     # Stream output
-    for output in llm(full_prompt, max_tokens=1000, temperature=0.7, top_p=0.9, stream=True):
-        content = output["choices"][0]["text"]
         if content:
             yield content

 import gradio as gr
 import subprocess
 import sys
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 from typing import Generator
+# Install transformers at runtime if not found
 try:
+    from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 except ImportError:
+    print("Installing transformers...")
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers==4.44.2"])
+    from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+# Initialize model and tokenizer
+model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    device_map="auto",  # Offload to CPU
+    torch_dtype=torch.float16,  # Reduce memory usage
+    trust_remote_code=True
 )
 def generate_astrology_prediction(prompt: str) -> Generator[str, None, None]:
     system_prompt = (
         "You are an expert astrologer, specializing in fortune-telling. Given a user prompt "
         "containing details like zodiac sign, birth date, or specific questions, provide predictions "
+        "about their future, including career, love life, and success. Stream the output line by line. "
         "Use bullet points for key predictions and keep responses engaging and concise. "
+        "If the prompt is vague (e.g., 'Hi'), respond with a request for more details like zodiac sign "
+        "or birth date, followed by a general prediction assuming a random zodiac sign (e.g., Libra)."
     )
     full_prompt = f"<|SYSTEM|> {system_prompt}\n<|USER|> {prompt}\n<|ASSISTANT|>"
+    # Tokenize input
+    inputs = tokenizer(full_prompt, return_tensors="pt").to("cpu")
     # Stream output
+    streamer = TextStreamer(tokenizer, skip_prompt=True)
+    for token in model.generate(
+        **inputs,
+        max_length=1000,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True,
+        streamer=streamer
+    ):
+        # Decode tokens as they are generated
+        content = tokenizer.decode(token, skip_special_tokens=True)
         if content:
             yield content