Subhadip866 commited on
Commit
a19b3d1
·
verified ·
1 Parent(s): 7f520db

Upload 2 files

Browse files
Files changed (2) hide show
  1. kognieLlama.py +270 -0
  2. requirements.txt +140 -0
kognieLlama.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from typing import List, Optional, Sequence, Any, AsyncGenerator
3
+
4
+ from llama_index.legacy.llms import LLM, LLMMetadata
5
+ from llama_index.legacy.llms.types import ChatMessage
6
+ from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
7
+ from llama_index.core.base.llms.types import ChatMessage, ChatResponse, CompletionResponseAsyncGen, ChatResponseAsyncGen, MessageRole, CompletionResponse, CompletionResponseGen
8
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
9
+
10
+
11
+ class Kognie(LLM):
12
+ """
13
+ A custom LLM that calls a FastAPI server at /text endpoint.
14
+ """
15
+ base_url: str = 'http://api2.kognie.com'
16
+ api_key: str
17
+ model: str
18
+ response_format: str = 'url'
19
+
20
+ @property
21
+ def metadata(self) -> LLMMetadata:
22
+ # Provide info about your model to LlamaIndex (adjust as needed)
23
+ return LLMMetadata(
24
+ model_name=self.model
25
+ )
26
+
27
+ def _generate_text(
28
+ self,
29
+ prompt: str,
30
+ model: Optional[str] = None,
31
+ **kwargs
32
+ ) -> str:
33
+ """
34
+ The single-turn text generation method.
35
+ LlamaIndex calls `_generate_text` internally whenever it needs a completion.
36
+ """
37
+
38
+ # Decide on mode and model to use, falling back to defaults
39
+ selected_model = model if model else self.model
40
+
41
+ endpoint = f"{self.base_url}/text"
42
+
43
+ # Prepare GET request parameters
44
+ params = {
45
+ "question": prompt,
46
+ "model": selected_model
47
+ }
48
+
49
+ # Prepare HTTP headers
50
+ headers = {
51
+ "X-KEY": self.api_key
52
+ }
53
+
54
+ try:
55
+ # Send request
56
+ response = requests.get(endpoint, params=params, headers=headers)
57
+ response.raise_for_status()
58
+ except requests.HTTPError as exc:
59
+ raise ValueError(f"FastAPI /text endpoint error: {exc}") from exc
60
+
61
+
62
+ data = response.json()
63
+ text_output = data.get("response", "")
64
+
65
+ return text_output
66
+
67
+ def _generate_image(
68
+ self,
69
+ prompt: str,
70
+ model: str,
71
+ response_format: str,
72
+ **kwargs
73
+ ) -> str:
74
+ """
75
+ The single-turn text generation method.
76
+ LlamaIndex calls `_generate_text` internally whenever it needs a completion.
77
+ """
78
+
79
+ # Decide on mode and model to use, falling back to defaults
80
+ selected_model = model if model else self.model
81
+
82
+ endpoint = f"{self.base_url}/image"
83
+
84
+ # Prepare GET request parameters
85
+ params = {
86
+ "question": prompt,
87
+ "model": selected_model,
88
+ "response_format": response_format
89
+
90
+ }
91
+
92
+ # Prepare HTTP headers
93
+ headers = {
94
+ "X-KEY": self.api_key
95
+ }
96
+
97
+ try:
98
+ # Send request
99
+ response = requests.get(endpoint, params=params, headers=headers)
100
+ response.raise_for_status()
101
+ except requests.HTTPError as exc:
102
+ raise ValueError(f"FastAPI /text endpoint error: {exc}") from exc
103
+
104
+ # Parse JSON
105
+ data = response.json()
106
+
107
+ text_output = data.get("response", "")
108
+
109
+ return text_output
110
+
111
+ def generate_img(
112
+ self,
113
+ prompt: str,
114
+ model: str,
115
+ response_format: str,
116
+ ) -> ChatMessage:
117
+
118
+
119
+ img_output = self._generate_image(
120
+ prompt=prompt,
121
+ model=model,
122
+ response_format=response_format
123
+ )
124
+
125
+ return ChatMessage(role="assistant", content=img_output)
126
+
127
+ # (Optional) Multi-turn chat approach
128
+ def chat(
129
+ self,
130
+ messages: List[ChatMessage],
131
+ model: Optional[str] = None,
132
+ **kwargs
133
+ ) -> ChatMessage:
134
+ """
135
+ If you want to handle multi-turn chat style conversation, override this method.
136
+ In LlamaIndex, some indices or chat modules might call `chat(messages=...)`.
137
+ """
138
+ # Merge messages into a single prompt
139
+ # e.g. if you want to pass a conversation log:
140
+ conversation_log = ""
141
+ for m in messages:
142
+ role = m.role # "system", "user", or "assistant"
143
+ content = m.content
144
+ if role == "user":
145
+ conversation_log += f"User: {content}\n"
146
+ else:
147
+ conversation_log += f"{role.capitalize()}: {content}\n"
148
+
149
+ # Now just call your single-turn generation on the entire conversation log
150
+ # This is simplistic; you can implement more advanced chat logic if needed
151
+ text_output = self._generate_text(
152
+ prompt=conversation_log,
153
+ model=model,
154
+ **kwargs
155
+ )
156
+
157
+ return ChatMessage(role="assistant", content=text_output)
158
+
159
+ @llm_chat_callback()
160
+ def messages_to_prompt(messages):
161
+ prompt = ""
162
+ for message in messages:
163
+ if message.role == MessageRole.SYSTEM:
164
+ prompt += f"<|system|>\n(message.content)</s>\n"
165
+ elif message.role == MessageRole.USER:
166
+ prompt += f"<|user|>\n{message.content}</s>\n"
167
+ elif message.role == MessageRole.ASSISTANT:
168
+ prompt += f"<|assistant|>\n{message.content}</s>\n"
169
+ # Ensure the prompt starts with a system message
170
+ if not prompt.startswith("<|system|>\n"):
171
+ prompt = "<|system|>\n</s>\n" + prompt
172
+ # Add a final assistant prompt
173
+ prompt += "<|assistant|>\n"
174
+ return prompt
175
+
176
+ async def stream_chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> AsyncGenerator[ChatResponse, None]:
177
+ # Assume `astream_complete` is an async method that yields CompletionResponse objects
178
+ async for completion_response in self.astream_complete(self.messages_to_prompt(messages), **kwargs):
179
+ # Here, you manually convert each CompletionResponse to a ChatResponse
180
+ chat_response = self.convert_completion_to_chat(
181
+ completion_response)
182
+ yield chat_response
183
+
184
+ async def astream_complete(self, prompt: str, **kwargs: Any) -> AsyncGenerator[CompletionResponse, None]:
185
+ # Implement your logic to asynchronously stream completion responses
186
+ pass
187
+
188
+ def convert_completion_to_chat(self, completion_response: CompletionResponse) -> ChatResponse:
189
+ # Implement your conversion logic here
190
+ # For simplicity, we're directly using the completion text as the chat content
191
+ return ChatResponse(message=ChatMessage(role="assistant", content=completion_response.text))
192
+
193
+ @llm_chat_callback()
194
+ async def achat(
195
+ self,
196
+ messages: Sequence[ChatMessage],
197
+ **kwargs: Any,
198
+ ) -> ChatResponse:
199
+ return self.chat(messages, **kwargs)
200
+
201
+ @llm_chat_callback()
202
+ async def astream_chat(
203
+ self,
204
+ messages: Sequence[ChatMessage],
205
+ **kwargs: Any,
206
+ ) -> ChatResponseAsyncGen:
207
+ async def gen() -> ChatResponseAsyncGen:
208
+ for message in self.stream_chat(messages, **kwargs):
209
+ yield message
210
+
211
+ # NOTE: convert generator to async generator
212
+ return gen()
213
+
214
+ @llm_completion_callback()
215
+ async def acomplete(
216
+ self, prompt: str, formatted: bool = False, **kwargs: Any
217
+ ) -> CompletionResponse:
218
+ return self.complete(prompt, formatted=formatted, **kwargs)
219
+
220
+ @llm_completion_callback()
221
+ def complete(
222
+ self, prompt: str, formatted: bool = False, **kwargs: Any
223
+ ) -> CompletionResponse:
224
+ return self.complete(prompt, formatted=formatted, **kwargs)
225
+
226
+ @llm_completion_callback()
227
+ async def astream_complete(
228
+ self, prompt: str, formatted: bool = False, **kwargs: Any
229
+ ) -> CompletionResponseAsyncGen:
230
+ async def gen() -> CompletionResponseAsyncGen:
231
+ for message in self.stream_complete(prompt, formatted=formatted, **kwargs):
232
+ yield message
233
+
234
+ # NOTE: convert generator to async generator
235
+ return gen()
236
+
237
+ @llm_completion_callback()
238
+ def stream_complete(
239
+ self, prompt: str, formatted: bool = False, **kwargs: Any
240
+ ) -> CompletionResponseGen:
241
+ def gen() -> CompletionResponseGen:
242
+ for message in self.stream_complete(prompt, formatted=formatted, **kwargs):
243
+ yield message
244
+ return gen()
245
+
246
+ @classmethod
247
+ def class_name(cls) -> str:
248
+ return "custom_llm"
249
+
250
+
251
+ # # 1) Initialize your custom LLM
252
+ # custom_llm = Kognie(
253
+ # api_key="kg-qnA0uVr4MbJmDtpuyQEmnZWnwe6RkZjF",
254
+ # model="gpt-4o-mini"
255
+ # )
256
+
257
+ # answer = custom_llm.chat(messages=[ChatMessage(role="user", content="Who was the first president of the United States?")])
258
+ # print(answer)
259
+
260
+ # answer = custom_llm.generate_img(prompt='a dog', model='flux-pro-1.1', response_format='url')
261
+ # documents = SimpleDirectoryReader("./data").load_data()
262
+
263
+
264
+ # vector_index = VectorStoreIndex.from_documents(documents)
265
+ # query_engine = vector_index.as_query_engine()
266
+ # answer = query_engine.query(
267
+ # "what is the documents about?"
268
+ # )
269
+ # print(answer)
270
+
requirements.txt ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.12.9
4
+ aiosignal==1.3.2
5
+ aiosqlite==0.21.0
6
+ annotated-types==0.7.0
7
+ anthropic==0.52.2
8
+ anyio==4.9.0
9
+ attrs==25.3.0
10
+ banks==2.1.2
11
+ beautifulsoup4==4.13.4
12
+ boto3==1.38.30
13
+ botocore==1.38.30
14
+ cachetools==5.5.2
15
+ certifi==2025.4.26
16
+ charset-normalizer==3.4.2
17
+ click==8.2.1
18
+ colorama==0.4.6
19
+ dataclasses-json==0.6.7
20
+ Deprecated==1.2.18
21
+ dirtyjson==1.0.8
22
+ distro==1.9.0
23
+ eval_type_backport==0.2.2
24
+ fastapi==0.115.12
25
+ ffmpy==0.6.0
26
+ filelock==3.18.0
27
+ filetype==1.2.0
28
+ frozenlist==1.6.2
29
+ fsspec==2025.5.1
30
+ google-auth==2.40.3
31
+ google-genai==1.19.0
32
+ gradio==5.33.0
33
+ gradio_client==1.10.2
34
+ greenlet==3.2.2
35
+ griffe==1.7.3
36
+ groovy==0.1.2
37
+ h11==0.16.0
38
+ httpcore==1.0.9
39
+ httpx==0.28.1
40
+ httpx-sse==0.4.0
41
+ huggingface-hub==0.32.4
42
+ idna==3.10
43
+ Jinja2==3.1.6
44
+ jiter==0.10.0
45
+ jmespath==1.0.1
46
+ joblib==1.5.1
47
+ jsonpatch==1.33
48
+ jsonpointer==3.0.0
49
+ langchain==0.3.25
50
+ langchain-anthropic==0.3.15
51
+ langchain-community==0.3.24
52
+ langchain-core==0.3.63
53
+ langchain-openai==0.3.19
54
+ langchain-text-splitters==0.3.8
55
+ langsmith==0.3.45
56
+ llama-cloud==0.1.23
57
+ llama-cloud-services==0.6.28
58
+ llama-index==0.12.40
59
+ llama-index-agent-openai==0.4.9
60
+ llama-index-cli==0.4.3
61
+ llama-index-core==0.12.40
62
+ llama-index-embeddings-openai==0.3.1
63
+ llama-index-indices-managed-llama-cloud==0.7.4
64
+ llama-index-legacy==0.9.48.post4
65
+ llama-index-llms-anthropic==0.7.2
66
+ llama-index-llms-google-genai==0.2.1
67
+ llama-index-llms-mistralai==0.5.0
68
+ llama-index-llms-openai==0.4.3
69
+ llama-index-multi-modal-llms-openai==0.5.1
70
+ llama-index-program-openai==0.3.2
71
+ llama-index-question-gen-openai==0.3.1
72
+ llama-index-readers-file==0.4.9
73
+ llama-index-readers-llama-parse==0.4.0
74
+ llama-index-tools-bing-search==0.3.0
75
+ llama-parse==0.6.28
76
+ markdown-it-py==3.0.0
77
+ MarkupSafe==3.0.2
78
+ marshmallow==3.26.1
79
+ mcp==1.9.0
80
+ mdurl==0.1.2
81
+ mistralai==1.8.1
82
+ multidict==6.4.4
83
+ mypy_extensions==1.1.0
84
+ nest-asyncio==1.6.0
85
+ networkx==3.5
86
+ nltk==3.9.1
87
+ numpy==2.2.6
88
+ openai==1.84.0
89
+ orjson==3.10.18
90
+ packaging==24.2
91
+ pandas==2.2.3
92
+ pillow==11.2.1
93
+ platformdirs==4.3.8
94
+ propcache==0.3.1
95
+ pyasn1==0.6.1
96
+ pyasn1_modules==0.4.2
97
+ pydantic==2.11.5
98
+ pydantic-settings==2.9.1
99
+ pydantic_core==2.33.2
100
+ pydub==0.25.1
101
+ Pygments==2.19.1
102
+ pypdf==5.6.0
103
+ python-dateutil==2.9.0.post0
104
+ python-dotenv==1.1.0
105
+ python-multipart==0.0.20
106
+ pytz==2025.2
107
+ PyYAML==6.0.2
108
+ regex==2024.11.6
109
+ requests==2.32.3
110
+ requests-toolbelt==1.0.0
111
+ rich==14.0.0
112
+ rsa==4.9.1
113
+ ruff==0.11.12
114
+ s3transfer==0.13.0
115
+ safehttpx==0.1.6
116
+ semantic-version==2.10.0
117
+ shellingham==1.5.4
118
+ six==1.17.0
119
+ sniffio==1.3.1
120
+ soupsieve==2.7
121
+ SQLAlchemy==2.0.41
122
+ sse-starlette==2.3.6
123
+ starlette==0.46.2
124
+ striprtf==0.0.26
125
+ tenacity==8.5.0
126
+ tiktoken==0.9.0
127
+ tomlkit==0.13.3
128
+ tqdm==4.67.1
129
+ typer==0.16.0
130
+ typing-inspect==0.9.0
131
+ typing-inspection==0.4.1
132
+ typing_extensions==4.14.0
133
+ tzdata==2025.2
134
+ urllib3==2.4.0
135
+ uvicorn==0.34.3
136
+ websockets==15.0.1
137
+ whisper==1.1.10
138
+ wrapt==1.17.2
139
+ yarl==1.20.0
140
+ zstandard==0.23.0