File size: 2,587 Bytes
3459d0e
 
 
 
 
 
 
fe4f27c
b0d92ea
fe4f27c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e834ef6
fe4f27c
ef5dcbd
fe4f27c
 
3459d0e
e834ef6
fe4f27c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3671cba
 
fe4f27c
 
 
 
 
 
 
 
c2dcbf1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#这个可以工作!!!
#url = 'https://binqiangliu-fastapi-in-docker.hf.space/api/chat'
#data = {'user_question': "Tell me a joke"}
#response = requests.post(url, json=data)
#result = response.json()


from fastapi import FastAPI, Request
import streamlit as st
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from langchain import PromptTemplate, LLMChain
import numpy as np
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.memory.chat_message_histories import StreamlitChatMessageHistory
from langchain import HuggingFaceHub
import os
from dotenv import load_dotenv
load_dotenv()
from pathlib import Path
from huggingface_hub import InferenceClient
from langchain import HuggingFaceHub
import requests
import uuid
import sys

hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
repo_id = os.environ.get('repo_id')
#port = os.getenv('port')

llm = HuggingFaceHub(repo_id=repo_id,
                     #huggingfacehub_api_token="hf_p***K",
                     huggingfacehub_api_token=hf_token,
                     model_kwargs={"min_length":1024,
                                   "max_new_tokens":5632, "do_sample":True,
                                   "temperature":0.1,
                                   "top_k":50,
                                   "top_p":0.95, "eos_token_id":49155}) 

prompt_template = """
<<SYS>>You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
In each conversation, question is placed after [INST] while your answer should be placed after [/INST].<</SYS>>
[INST] {user_question} [/INST]
assistant:
"""

llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))

app = FastAPI()
class ChatRequest(BaseModel):
    user_question: str
@app.post('/api/chat')
async def chat(request: Request, chat_request: ChatRequest):
    user_query = chat_request.user_question
    initial_response = llm_chain.run(user_query)
    return JSONResponse({'response': initial_response})

#if __name__ == '__main__':
    #uvicorn.run(app, host='0.0.0.0')