Spaces:
Sleeping
Sleeping
| from ctransformers import AutoModelForCausalLM | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from llama_cpp import Llama | |
| llm = Llama.from_pretrained( | |
| repo_id="TheBloke/TowerInstruct-7B-v0.1-GGUF", | |
| filename="towerinstruct-7b-v0.1.Q5_K_M.gguf", | |
| n_ctx = 4096, | |
| ) | |
| #Pydantic object | |
| class validation(BaseModel): | |
| prompt: str | |
| #Fast API | |
| app = FastAPI() | |
| def translate_zh_to_en(llm, text): | |
| response = llm.create_chat_completion( | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": f"Translate the following text from Chinese into English.\nChinese: {text}\nEnglish:" | |
| } | |
| ], | |
| temperature=0.2, | |
| max_tokens=2048 | |
| ) | |
| # Assuming the response from llm.create_chat_completion is stored in a variable called response | |
| content = response['choices'][0]['message']['content'] | |
| return content | |
| # <|im_start|>user | |
| # Translate the following text from Portuguese into English. | |
| # Portuguese: Um grupo de investigadores lançou um novo modelo para tarefas relacionadas com tradução. | |
| # English:<|im_end|> | |
| # <|im_start|>assistant | |
| async def stream(item: validation): | |
| return translate_zh_to_en(llm, item.prompt) | |