File size: 2,638 Bytes
d90fe60 3d28b1b d90fe60 545b1c5 d90fe60 3dfd64d d90fe60 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import time
import os
from typing import Any, List, Mapping, Optional
import gradio as gr
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
import requests
class RunpodServerlessLLM(LLM):
pod_id: str
api_key: str
request_ids: List[str] = []
@property
def _llm_type(self) -> str:
return "runpod_serverless"
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
if stop is not None and self._current_job_id is not None:
#TODO: handle stop sequence
...
response = self._run_generate_request(prompt)
return response
@property
def _identifying_params(self) -> Mapping[str, Any]:
"""Get the identifying parameters."""
return {"pod_id": self.pod_id}
def _request_headers(self) -> Mapping[str, str]:
return {
"accept": "application/json",
"content-type": "application/json",
"authorization": self.api_key,
}
def _request_url(self) -> str:
return f"https://api.runpod.ai/v2/{self.pod_id}"
def _run_generate_request(self, prompt: str) -> str:
headers = self._request_headers()
input = {
"method_name": "generate",
"input": {"model": "mistral", "prompt": prompt},
}
print("before request", input, self._request_url(), headers)
# TODO: Handle network errors
out = requests.post(
f"{self._request_url()}/run",
headers=headers,
json={"input": input},
).json()
id = out["id"]
self.request_ids.append(id)
while out["status"] != "COMPLETED":
out = requests.get(
f"{self._request_url()}/status/{id}",
headers=headers,
).json()
time.sleep(1)
return out["output"]["response"]
llm = RunpodServerlessLLM(
pod_id=os.getenv("podid"),
api_key=os.getenv("api"),
)
def llmresponse(user_prompt):
response = llm.predict(user_prompt)
return response
iface = gr.Interface(
fn=llmresponse,
inputs=gr.Textbox(label="User Prompt", placeholder="Enter Your Prompt"),
outputs=gr.Textbox(label="LLM Output"),
title="SkynetGPT",
description="No Censorship ( This GPT cannot deny to answer) , Average Response Time - 45 seconds first try , Made By Akash Mondal , You are responsible for usage just like owning a weapon"
)
iface.launch() |