Spaces:
Paused
Paused
| from ctransformers import AutoModelForCausalLM, AutoTokenizer | |
| from transformers import pipeline | |
| import streamlit as st | |
| # Simple inference example | |
| # output = llm( | |
| # "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", # Prompt | |
| # max_tokens=512, # Generate up to 512 tokens | |
| # stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using. | |
| # echo=True # Whether to echo the prompt | |
| #) | |
| prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" | |
| def get_llm_response(repo, filename, model_type, gpu_layers, prompt): | |
| print("Loading model") | |
| model = AutoModelForCausalLM.from_pretrained(repo, model_file=filename, model_type=model_type, gpu_layers=gpu_layers) | |
| print("Model loaded") | |
| #llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt) | |
| print(f"LLM prompt: {prompt}") | |
| response = model(prompt, stop=["</s>"]) | |
| print(f"Response: {response}") | |
| return response | |