Invescoz commited on
Commit
ee0fb52
·
verified ·
1 Parent(s): ce385a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -17
app.py CHANGED
@@ -1,24 +1,26 @@
1
  import gradio as gr
2
  import subprocess
3
  import sys
4
- import os
 
5
  from typing import Generator
6
 
7
- # Install llama-cpp-python at runtime if not found
8
  try:
9
- from llama_cpp import Llama
10
  except ImportError:
11
- print("Installing llama-cpp-python...")
12
- subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python==0.2.85"])
13
- from llama_cpp import Llama
14
 
15
- # Initialize model
16
- model_path = "tinyllama-1.1b-chat-v1.0.Q4_0.gguf" # Downloaded from TinyLlama/TinyLlama-1.1B-Chat-v1.0-GGUF
17
- llm = Llama.from_pretrained(
18
- repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
19
- filename=model_path,
20
- n_ctx=2048, # Context length for prompts
21
- n_threads=2 # Use 2 CPU cores
 
22
  )
23
 
24
  def generate_astrology_prediction(prompt: str) -> Generator[str, None, None]:
@@ -28,15 +30,28 @@ def generate_astrology_prediction(prompt: str) -> Generator[str, None, None]:
28
  system_prompt = (
29
  "You are an expert astrologer, specializing in fortune-telling. Given a user prompt "
30
  "containing details like zodiac sign, birth date, or specific questions, provide predictions "
31
- "about their future, career, love life, and success. Stream the output line by line. "
32
  "Use bullet points for key predictions and keep responses engaging and concise. "
33
- "If the prompt is vague (e.g., 'Hi'), ask for more details like zodiac sign or birth date."
 
34
  )
35
  full_prompt = f"<|SYSTEM|> {system_prompt}\n<|USER|> {prompt}\n<|ASSISTANT|>"
36
 
 
 
 
37
  # Stream output
38
- for output in llm(full_prompt, max_tokens=1000, temperature=0.7, top_p=0.9, stream=True):
39
- content = output["choices"][0]["text"]
 
 
 
 
 
 
 
 
 
40
  if content:
41
  yield content
42
 
 
1
  import gradio as gr
2
  import subprocess
3
  import sys
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
6
  from typing import Generator
7
 
8
+ # Install transformers at runtime if not found
9
  try:
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
11
  except ImportError:
12
+ print("Installing transformers...")
13
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers==4.44.2"])
14
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
15
 
16
+ # Initialize model and tokenizer
17
+ model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ model_name,
21
+ device_map="auto", # Offload to CPU
22
+ torch_dtype=torch.float16, # Reduce memory usage
23
+ trust_remote_code=True
24
  )
25
 
26
  def generate_astrology_prediction(prompt: str) -> Generator[str, None, None]:
 
30
  system_prompt = (
31
  "You are an expert astrologer, specializing in fortune-telling. Given a user prompt "
32
  "containing details like zodiac sign, birth date, or specific questions, provide predictions "
33
+ "about their future, including career, love life, and success. Stream the output line by line. "
34
  "Use bullet points for key predictions and keep responses engaging and concise. "
35
+ "If the prompt is vague (e.g., 'Hi'), respond with a request for more details like zodiac sign "
36
+ "or birth date, followed by a general prediction assuming a random zodiac sign (e.g., Libra)."
37
  )
38
  full_prompt = f"<|SYSTEM|> {system_prompt}\n<|USER|> {prompt}\n<|ASSISTANT|>"
39
 
40
+ # Tokenize input
41
+ inputs = tokenizer(full_prompt, return_tensors="pt").to("cpu")
42
+
43
  # Stream output
44
+ streamer = TextStreamer(tokenizer, skip_prompt=True)
45
+ for token in model.generate(
46
+ **inputs,
47
+ max_length=1000,
48
+ temperature=0.7,
49
+ top_p=0.9,
50
+ do_sample=True,
51
+ streamer=streamer
52
+ ):
53
+ # Decode tokens as they are generated
54
+ content = tokenizer.decode(token, skip_special_tokens=True)
55
  if content:
56
  yield content
57