OrbitMC commited on
Commit
5191be0
·
verified ·
1 Parent(s): 6894d69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -3,17 +3,16 @@ import io
3
  import base64
4
  from flask import Flask, request, jsonify
5
  from huggingface_hub import hf_hub_download
6
- from llama_cpp import Llama
7
  from kittentts import KittenTTS
8
  import soundfile as sf
9
 
10
  app = Flask(__name__)
11
 
12
- # Load models
13
  MODEL_REPO = "unsloth/gemma-3-270m-it-GGUF"
14
  MODEL_FILE = "gemma-3-270m-it-F16.gguf"
15
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir="models")
16
- llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4, n_gpu_layers=0, verbose=False)
17
  tts = KittenTTS("KittenML/kitten-tts-nano-0.8-int8")
18
 
19
  HTML = """<!DOCTYPE html>
@@ -56,7 +55,11 @@ def index():
56
  @app.route('/api/chat', methods=['POST'])
57
  def chat():
58
  user_msg = request.json['message']
59
- response = llm.create_chat_completion(messages=[{"role": "user", "content": user_msg}], max_tokens=512, temperature=0.7)['choices'][0]['message']['content']
 
 
 
 
60
  audio = tts.generate(text=response, voice="Kiki")
61
  buf = io.BytesIO()
62
  sf.write(buf, audio, 24000, format='WAV')
 
3
  import base64
4
  from flask import Flask, request, jsonify
5
  from huggingface_hub import hf_hub_download
6
+ from ctransformers import AutoModelForCausalLM
7
  from kittentts import KittenTTS
8
  import soundfile as sf
9
 
10
  app = Flask(__name__)
11
 
 
12
  MODEL_REPO = "unsloth/gemma-3-270m-it-GGUF"
13
  MODEL_FILE = "gemma-3-270m-it-F16.gguf"
14
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir="models")
15
+ llm = AutoModelForCausalLM.from_pretrained(model_path, model_type="gemma", context_length=2048)
16
  tts = KittenTTS("KittenML/kitten-tts-nano-0.8-int8")
17
 
18
  HTML = """<!DOCTYPE html>
 
55
  @app.route('/api/chat', methods=['POST'])
56
  def chat():
57
  user_msg = request.json['message']
58
+ prompt = f"""<bos><start_of_turn>user
59
+ {user_msg}<end_of_turn>
60
+ <start_of_turn>model
61
+ """
62
+ response = llm(prompt, max_new_tokens=512, temperature=0.7, stop=["<end_of_turn>"])
63
  audio = tts.generate(text=response, voice="Kiki")
64
  buf = io.BytesIO()
65
  sf.write(buf, audio, 24000, format='WAV')