File size: 2,638 Bytes
d90fe60
3d28b1b
d90fe60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545b1c5
 
d90fe60
 
 
 
 
 
 
 
 
3dfd64d
 
d90fe60
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import time
import os
from typing import Any, List, Mapping, Optional
import gradio as gr
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
import requests


class RunpodServerlessLLM(LLM):
    pod_id: str
    api_key: str
    request_ids: List[str] = []

    @property
    def _llm_type(self) -> str:
        return "runpod_serverless"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        if stop is not None and self._current_job_id is not None:
            #TODO: handle stop sequence
            ...
        response = self._run_generate_request(prompt)
        return response

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"pod_id": self.pod_id}

    def _request_headers(self) -> Mapping[str, str]:
        return {
            "accept": "application/json",
            "content-type": "application/json",
            "authorization": self.api_key,
        }

    def _request_url(self) -> str:
        return f"https://api.runpod.ai/v2/{self.pod_id}"


    def _run_generate_request(self, prompt: str) -> str:
        headers = self._request_headers()
        input = {
            "method_name": "generate",
            "input": {"model": "mistral", "prompt": prompt},
        }
        print("before request", input, self._request_url(), headers)
        
        # TODO: Handle network errors
        out = requests.post(
            f"{self._request_url()}/run",
            headers=headers,
            json={"input": input},
        ).json()

        id = out["id"]
        self.request_ids.append(id)

        while out["status"] != "COMPLETED":
            out = requests.get(
                f"{self._request_url()}/status/{id}",
                headers=headers,
            ).json()
            time.sleep(1)

        return out["output"]["response"]

llm = RunpodServerlessLLM(
    pod_id=os.getenv("podid"),
    api_key=os.getenv("api"),
)
def llmresponse(user_prompt): 
    response = llm.predict(user_prompt)
    return response 

iface = gr.Interface(
    fn=llmresponse, 
    inputs=gr.Textbox(label="User Prompt", placeholder="Enter Your Prompt"),
    outputs=gr.Textbox(label="LLM Output"),
    title="SkynetGPT",
    description="No Censorship ( This GPT cannot deny to answer) , Average Response Time - 45 seconds first try , Made By Akash Mondal , You are responsible for usage just like owning a weapon"

)

iface.launch()