File size: 3,510 Bytes
471dd95
 
72e7e3c
 
4aef1af
6ea2900
3d01049
c700750
b9a2219
6ea2900
4aef1af
e79a89e
471dd95
 
 
 
 
 
 
 
 
a8f04c2
 
4aef1af
a8f04c2
 
 
4aef1af
be588c3
 
4aef1af
 
a8f04c2
 
c4bc4d2
72e7e3c
 
a8f04c2
 
be588c3
3d01049
be588c3
 
 
 
 
 
3d01049
6ea2900
3e22d29
be588c3
 
 
 
 
3e22d29
be588c3
 
3e22d29
be588c3
 
 
 
 
 
 
 
 
a8f04c2
be588c3
 
 
a8f04c2
be588c3
 
 
a8f04c2
be588c3
 
a8f04c2
471dd95
 
c4bc4d2
28ff6a7
a8f04c2
 
 
 
 
28ff6a7
 
be588c3
 
 
28ff6a7
be588c3
 
 
 
 
9a8de5e
 
a8f04c2
 
 
 
ab77d93
a8f04c2
ab77d93
a8f04c2
ab77d93
a8f04c2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import requests
# from pydantic import BaseModel

# from langchain.llms.huggingface_pipeline import HuggingFacePipeline

key = os.environ.get("huggingface_key")
openai_api_key = os.environ.get("openai_key")
app = FastAPI(openapi_url="/api/v1/sparrow-data/openapi.json", docs_url="/api/v1/sparrow-data/docs")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
    allow_credentials=True,
)
# API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
# headers = {"Authorization": f"Bearer {key}"}

# def query(payload):
# 	response = requests.post(API_URL, headers=headers, json=payload)
# 	return response.json()

# from fastapi import FastAPI, HTTPException, Body
# from transformers import GPT2LMHeadModel, GPT2Tokenizer


# model = GPT2LMHeadModel.from_pretrained("EleutherAI/gpt-neo-2.7B")
# tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")


tokenizer = AutoTokenizer.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
model = AutoModelForCausalLM.from_pretrained("WizardLM/WizardCoder-1B-V1.0")

pipe = pipeline("text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=4000,
                do_sample=True,
                top_p=0.95,
                repetition_penalty=1.2,
               )
# hf_llm = HuggingFacePipeline(pipeline=pipe)


# class ChatRequest(BaseModel):
#     messages: list
#     temperature: float = 1.0
#     max_tokens: int = 50
#     stream: bool = False

# class ChatResponse(BaseModel):
#     response: str

# @app.post("/v1/chat/completions", response_model=ChatResponse)
# async def chat_completions(request: ChatRequest):
#     try:
#         # Prepare input prompt
#         input_prompt = ""
#         for message in request.messages:
#             role = message.get('role', 'user')
#             content = message.get('content', '')
#             input_prompt += f"{role}: {content}\n"

#         # Tokenize and generate response
#         input_ids = tokenizer.encode(input_prompt, return_tensors='pt')
#         output = model.generate(input_ids, max_length=1024, temperature=request.temperature, max_tokens=request.max_tokens)

#         # Decode and send response
#         response = tokenizer.decode(output[0], skip_special_tokens=True)
#         return {"response": response}

#     except Exception as e:
#         raise HTTPException(status_code=500, detail=str(e))


@app.get("/")
def root():
    return {"message": "R&D LLM API"}
# @app.get("/get")
# def get():
#     result = pipe("name 5 programming languages",do_sample=False)
#     print(result)
#     return {"message": result}


async def askLLM(prompt):
    output = pipe(prompt,do_sample=False)
    return output

@app.post("/ask_llm")
async def ask_llm_endpoint(prompt: str):
    # result = await askLLM(prompt)
    result = pipe(prompt,do_sample=False)
    return {"result": result}


# @app.post("/ask_HFAPI")
# def ask_HFAPI_endpoint(prompt: str):
#     result = query(prompt)
#     return {"result": result}
    
# from langchain.llms import OpenAI

# llm = OpenAI(model_name="text-davinci-003", temperature=0.5, openai_api_key=openai_api_key)

# @app.post("/ask_GPT")
# def ask_GPT_endpoint(prompt: str):
#     result = llm(prompt)
#     return {"result": result}