Spaces:
Runtime error
Runtime error
| #Much faster than FlaskAPI | |
| from fastapi import FastAPI, Request | |
| #import streamlit as st | |
| from fastapi.responses import JSONResponse | |
| from pydantic import BaseModel | |
| from langchain import PromptTemplate, LLMChain | |
| import numpy as np | |
| from langchain.chains import LLMChain | |
| from langchain.prompts import PromptTemplate | |
| from langchain.memory import ConversationBufferMemory | |
| #from langchain.memory.chat_message_histories import StreamlitChatMessageHistory | |
| from langchain import HuggingFaceHub | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from pathlib import Path | |
| from huggingface_hub import InferenceClient | |
| from langchain import HuggingFaceHub | |
| import requests | |
| import uuid | |
| import sys | |
| hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN') | |
| repo_id = os.environ.get('repo_id') | |
| #port = os.getenv('port') | |
| llm = HuggingFaceHub(repo_id=repo_id, | |
| #huggingfacehub_api_token="hf_p***K", | |
| huggingfacehub_api_token=hf_token, | |
| model_kwargs={"min_length":1024, | |
| "max_new_tokens":5632, "do_sample":True, | |
| "temperature":0.1, | |
| "top_k":50, | |
| "top_p":0.95, "eos_token_id":49155}) | |
| prompt_template = """ | |
| <<SYS>>You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. | |
| If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. | |
| In each conversation, question is placed after [INST] while your answer should be placed after [/INST].<</SYS>> | |
| [INST] {user_question} [/INST] | |
| assistant: | |
| """ | |
| llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template)) | |
| app = FastAPI() | |
| class ChatRequest(BaseModel): | |
| user_question: str | |
| async def chat(request: Request, chat_request: ChatRequest): | |
| user_query = chat_request.user_question | |
| initial_response = llm_chain.run(user_query) | |
| return JSONResponse({'response': initial_response}) | |
| #if __name__ == '__main__': | |
| #uvicorn.run(app, host='0.0.0.0') |