Spaces:

JohnSmith9982
/

ChuanhuChatGPT_Beta

Runtime error

App Files Files Community

ChuanhuChatGPT_Beta / modules /models /LLaMA.py

JohnSmith9982

Upload 166 files

458ae42 verified about 2 years ago

raw

history blame contribute delete

3.24 kB

	from __future__ import annotations

	import json
	import os
	from llama_cpp import Llama

	from ..index_func import *
	from ..presets import *
	from ..utils import *
	from .base_model import BaseLLMModel, download

	SYS_PREFIX = "<<SYS>>\n"
	SYS_POSTFIX = "\n<</SYS>>\n\n"
	INST_PREFIX = "<s>[INST] "
	INST_POSTFIX = " "
	OUTPUT_PREFIX = "[/INST] "
	OUTPUT_POSTFIX = "</s>"


	class LLaMA_Client(BaseLLMModel):
	def __init__(self, model_name, lora_path=None, user_name="") -> None:
	super().__init__(model_name=model_name, user=user_name)

	self.max_generation_token = 1000
	if model_name in MODEL_METADATA:
	path_to_model = download(
	MODEL_METADATA[model_name]["repo_id"],
	MODEL_METADATA[model_name]["filelist"][0],
	)
	else:
	dir_to_model = os.path.join("models", model_name)
	# look for nay .gguf file in the dir_to_model directory and its subdirectories
	path_to_model = None
	for root, dirs, files in os.walk(dir_to_model):
	for file in files:
	if file.endswith(".gguf"):
	path_to_model = os.path.join(root, file)
	break
	if path_to_model is not None:
	break
	self.system_prompt = ""

	if lora_path is not None:
	lora_path = os.path.join("lora", lora_path)
	self.model = Llama(model_path=path_to_model, lora_path=lora_path)
	else:
	self.model = Llama(model_path=path_to_model)

	def _get_llama_style_input(self):
	context = []
	for conv in self.history:
	if conv["role"] == "system":
	context.append(SYS_PREFIX + conv["content"] + SYS_POSTFIX)
	elif conv["role"] == "user":
	context.append(
	INST_PREFIX + conv["content"] + INST_POSTFIX + OUTPUT_PREFIX
	)
	else:
	context.append(conv["content"] + OUTPUT_POSTFIX)
	return "".join(context)
	# for conv in self.history:
	# if conv["role"] == "system":
	# context.append(conv["content"])
	# elif conv["role"] == "user":
	# context.append(
	# conv["content"]
	# )
	# else:
	# context.append(conv["content"])
	# return "\n\n".join(context)+"\n\n"

	def get_answer_at_once(self):
	context = self._get_llama_style_input()
	response = self.model(
	context,
	max_tokens=self.max_generation_token,
	stop=[],
	echo=False,
	stream=False,
	)
	return response, len(response)

	def get_answer_stream_iter(self):
	context = self._get_llama_style_input()
	iter = self.model(
	context,
	max_tokens=self.max_generation_token,
	stop=[SYS_PREFIX, SYS_POSTFIX, INST_PREFIX, OUTPUT_PREFIX, OUTPUT_POSTFIX],
	echo=False,
	stream=True,
	)
	partial_text = ""
	for i in iter:
	response = i["choices"][0]["text"]
	partial_text += response
	yield partial_text