link-test / endpoints.py
moamen270's picture
Update endpoints.py
3d01049
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import requests
# from pydantic import BaseModel
# from langchain.llms.huggingface_pipeline import HuggingFacePipeline
key = os.environ.get("huggingface_key")
openai_api_key = os.environ.get("openai_key")
app = FastAPI(openapi_url="/api/v1/sparrow-data/openapi.json", docs_url="/api/v1/sparrow-data/docs")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
allow_credentials=True,
)
# API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
# headers = {"Authorization": f"Bearer {key}"}
# def query(payload):
# response = requests.post(API_URL, headers=headers, json=payload)
# return response.json()
# from fastapi import FastAPI, HTTPException, Body
# from transformers import GPT2LMHeadModel, GPT2Tokenizer
# model = GPT2LMHeadModel.from_pretrained("EleutherAI/gpt-neo-2.7B")
# tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B")
tokenizer = AutoTokenizer.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
model = AutoModelForCausalLM.from_pretrained("WizardLM/WizardCoder-1B-V1.0")
pipe = pipeline("text-generation",
model=model,
tokenizer=tokenizer,
max_length=4000,
do_sample=True,
top_p=0.95,
repetition_penalty=1.2,
)
# hf_llm = HuggingFacePipeline(pipeline=pipe)
# class ChatRequest(BaseModel):
# messages: list
# temperature: float = 1.0
# max_tokens: int = 50
# stream: bool = False
# class ChatResponse(BaseModel):
# response: str
# @app.post("/v1/chat/completions", response_model=ChatResponse)
# async def chat_completions(request: ChatRequest):
# try:
# # Prepare input prompt
# input_prompt = ""
# for message in request.messages:
# role = message.get('role', 'user')
# content = message.get('content', '')
# input_prompt += f"{role}: {content}\n"
# # Tokenize and generate response
# input_ids = tokenizer.encode(input_prompt, return_tensors='pt')
# output = model.generate(input_ids, max_length=1024, temperature=request.temperature, max_tokens=request.max_tokens)
# # Decode and send response
# response = tokenizer.decode(output[0], skip_special_tokens=True)
# return {"response": response}
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
@app.get("/")
def root():
return {"message": "R&D LLM API"}
# @app.get("/get")
# def get():
# result = pipe("name 5 programming languages",do_sample=False)
# print(result)
# return {"message": result}
async def askLLM(prompt):
output = pipe(prompt,do_sample=False)
return output
@app.post("/ask_llm")
async def ask_llm_endpoint(prompt: str):
# result = await askLLM(prompt)
result = pipe(prompt,do_sample=False)
return {"result": result}
# @app.post("/ask_HFAPI")
# def ask_HFAPI_endpoint(prompt: str):
# result = query(prompt)
# return {"result": result}
# from langchain.llms import OpenAI
# llm = OpenAI(model_name="text-davinci-003", temperature=0.5, openai_api_key=openai_api_key)
# @app.post("/ask_GPT")
# def ask_GPT_endpoint(prompt: str):
# result = llm(prompt)
# return {"result": result}