File size: 3,510 Bytes
471dd95 72e7e3c 4aef1af 6ea2900 3d01049 c700750 b9a2219 6ea2900 4aef1af e79a89e 471dd95 a8f04c2 4aef1af a8f04c2 4aef1af be588c3 4aef1af a8f04c2 c4bc4d2 72e7e3c a8f04c2 be588c3 3d01049 be588c3 3d01049 6ea2900 3e22d29 be588c3 3e22d29 be588c3 3e22d29 be588c3 a8f04c2 be588c3 a8f04c2 be588c3 a8f04c2 be588c3 a8f04c2 471dd95 c4bc4d2 28ff6a7 a8f04c2 28ff6a7 be588c3 28ff6a7 be588c3 9a8de5e a8f04c2 ab77d93 a8f04c2 ab77d93 a8f04c2 ab77d93 a8f04c2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import requests
# from pydantic import BaseModel
# from langchain.llms.huggingface_pipeline import HuggingFacePipeline
key = os.environ.get("huggingface_key")
openai_api_key = os.environ.get("openai_key")
app = FastAPI(openapi_url="/api/v1/sparrow-data/openapi.json", docs_url="/api/v1/sparrow-data/docs")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
allow_credentials=True,
)
# API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
# headers = {"Authorization": f"Bearer {key}"}
# def query(payload):
# response = requests.post(API_URL, headers=headers, json=payload)
# return response.json()
# from fastapi import FastAPI, HTTPException, Body
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
# model = GPT2LMHeadModel.from_pretrained("EleutherAI/gpt-neo-2.7B")
# tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
tokenizer = AutoTokenizer.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
model = AutoModelForCausalLM.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
pipe = pipeline("text-generation",
model=model,
tokenizer=tokenizer,
max_length=4000,
do_sample=True,
top_p=0.95,
repetition_penalty=1.2,
)
# hf_llm = HuggingFacePipeline(pipeline=pipe)
# class ChatRequest(BaseModel):
# messages: list
# temperature: float = 1.0
# max_tokens: int = 50
# stream: bool = False
# class ChatResponse(BaseModel):
# response: str
# @app.post("/v1/chat/completions", response_model=ChatResponse)
# async def chat_completions(request: ChatRequest):
# try:
# # Prepare input prompt
# input_prompt = ""
# for message in request.messages:
# role = message.get('role', 'user')
# content = message.get('content', '')
# input_prompt += f"{role}: {content}\n"
# # Tokenize and generate response
# input_ids = tokenizer.encode(input_prompt, return_tensors='pt')
# output = model.generate(input_ids, max_length=1024, temperature=request.temperature, max_tokens=request.max_tokens)
# # Decode and send response
# response = tokenizer.decode(output[0], skip_special_tokens=True)
# return {"response": response}
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def root():
return {"message": "R&D LLM API"}
# @app.get("/get")
# def get():
# result = pipe("name 5 programming languages",do_sample=False)
# print(result)
# return {"message": result}
async def askLLM(prompt):
output = pipe(prompt,do_sample=False)
return output
@app.post("/ask_llm")
async def ask_llm_endpoint(prompt: str):
# result = await askLLM(prompt)
result = pipe(prompt,do_sample=False)
return {"result": result}
# @app.post("/ask_HFAPI")
# def ask_HFAPI_endpoint(prompt: str):
# result = query(prompt)
# return {"result": result}
# from langchain.llms import OpenAI
# llm = OpenAI(model_name="text-davinci-003", temperature=0.5, openai_api_key=openai_api_key)
# @app.post("/ask_GPT")
# def ask_GPT_endpoint(prompt: str):
# result = llm(prompt)
# return {"result": result} |