Spaces:

Subhadip866
/

testSpace

Configuration error

App Files Files Community

Subhadip866 commited on Jun 10, 2025

Commit

a19b3d1

verified ·

1 Parent(s): 7f520db

Upload 2 files

Browse files

Files changed (2) hide show

kognieLlama.py +270 -0
requirements.txt +140 -0

kognieLlama.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import requests
+from typing import List, Optional, Sequence, Any, AsyncGenerator
+from llama_index.legacy.llms import LLM, LLMMetadata
+from llama_index.legacy.llms.types import ChatMessage
+from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
+from llama_index.core.base.llms.types import ChatMessage, ChatResponse, CompletionResponseAsyncGen, ChatResponseAsyncGen, MessageRole, CompletionResponse, CompletionResponseGen
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
+class Kognie(LLM):
+    """
+    A custom LLM that calls a FastAPI server at /text endpoint.
+    """
+    base_url: str = 'http://api2.kognie.com'
+    api_key: str
+    model: str
+    response_format: str = 'url'
+    @property
+    def metadata(self) -> LLMMetadata:
+        # Provide info about your model to LlamaIndex (adjust as needed)
+        return LLMMetadata(
+            model_name=self.model
+        )
+    def _generate_text(
+        self,
+        prompt: str,
+        model: Optional[str] = None,
+        **kwargs
+    ) -> str:
+        """
+        The single-turn text generation method.
+        LlamaIndex calls `_generate_text` internally whenever it needs a completion.
+        """
+        # Decide on mode and model to use, falling back to defaults
+        selected_model = model if model else self.model
+        endpoint = f"{self.base_url}/text"
+        # Prepare GET request parameters
+        params = {
+            "question": prompt,
+            "model": selected_model
+        }
+        # Prepare HTTP headers
+        headers = {
+            "X-KEY": self.api_key
+        }
+        try:
+            # Send request
+            response = requests.get(endpoint, params=params, headers=headers)
+            response.raise_for_status()
+        except requests.HTTPError as exc:
+            raise ValueError(f"FastAPI /text endpoint error: {exc}") from exc
+        data = response.json()
+        text_output = data.get("response", "")
+        return text_output
+    def _generate_image(
+        self,
+        prompt: str,
+        model: str,
+        response_format: str,
+        **kwargs
+    ) -> str:
+        """
+        The single-turn text generation method.
+        LlamaIndex calls `_generate_text` internally whenever it needs a completion.
+        """
+        # Decide on mode and model to use, falling back to defaults
+        selected_model = model if model else self.model
+        endpoint = f"{self.base_url}/image"
+        # Prepare GET request parameters
+        params = {
+            "question": prompt,
+            "model": selected_model,
+            "response_format": response_format
+        }
+        # Prepare HTTP headers
+        headers = {
+            "X-KEY": self.api_key
+        }
+        try:
+            # Send request
+            response = requests.get(endpoint, params=params, headers=headers)
+            response.raise_for_status()
+        except requests.HTTPError as exc:
+            raise ValueError(f"FastAPI /text endpoint error: {exc}") from exc
+        # Parse JSON
+        data = response.json()
+        text_output = data.get("response", "")
+        return text_output
+    def generate_img(
+            self,
+            prompt: str,
+            model: str,
+            response_format: str,
+        ) -> ChatMessage:
+        img_output = self._generate_image(
+            prompt=prompt,
+            model=model,
+            response_format=response_format
+        )
+        return ChatMessage(role="assistant", content=img_output)
+    # (Optional) Multi-turn chat approach
+    def chat(
+        self,
+        messages: List[ChatMessage],
+        model: Optional[str] = None,
+        **kwargs
+    ) -> ChatMessage:
+        """
+        If you want to handle multi-turn chat style conversation, override this method.
+        In LlamaIndex, some indices or chat modules might call `chat(messages=...)`.
+        """
+        # Merge messages into a single prompt
+        # e.g. if you want to pass a conversation log:
+        conversation_log = ""
+        for m in messages:
+            role = m.role  # "system", "user", or "assistant"
+            content = m.content
+            if role == "user":
+                conversation_log += f"User: {content}\n"
+            else:
+                conversation_log += f"{role.capitalize()}: {content}\n"
+        # Now just call your single-turn generation on the entire conversation log
+        # This is simplistic; you can implement more advanced chat logic if needed
+        text_output = self._generate_text(
+            prompt=conversation_log,
+            model=model,
+            **kwargs
+        )
+        return ChatMessage(role="assistant", content=text_output)
+    @llm_chat_callback()
+    def messages_to_prompt(messages):
+        prompt = ""
+        for message in messages:
+            if message.role == MessageRole.SYSTEM:
+                prompt += f"<|system|>\n(message.content)</s>\n"
+            elif message.role == MessageRole.USER:
+                prompt += f"<|user|>\n{message.content}</s>\n"
+            elif message.role == MessageRole.ASSISTANT:
+                prompt += f"<|assistant|>\n{message.content}</s>\n"
+        # Ensure the prompt starts with a system message
+        if not prompt.startswith("<|system|>\n"):
+            prompt = "<|system|>\n</s>\n" + prompt
+        # Add a final assistant prompt
+        prompt += "<|assistant|>\n"
+        return prompt
+    async def stream_chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> AsyncGenerator[ChatResponse, None]:
+        # Assume `astream_complete` is an async method that yields CompletionResponse objects
+        async for completion_response in self.astream_complete(self.messages_to_prompt(messages), **kwargs):
+            # Here, you manually convert each CompletionResponse to a ChatResponse
+            chat_response = self.convert_completion_to_chat(
+                completion_response)
+            yield chat_response
+    async def astream_complete(self, prompt: str, **kwargs: Any) -> AsyncGenerator[CompletionResponse, None]:
+        # Implement your logic to asynchronously stream completion responses
+        pass
+    def convert_completion_to_chat(self, completion_response: CompletionResponse) -> ChatResponse:
+        # Implement your conversion logic here
+        # For simplicity, we're directly using the completion text as the chat content
+        return ChatResponse(message=ChatMessage(role="assistant", content=completion_response.text))
+    @llm_chat_callback()
+    async def achat(
+        self,
+        messages: Sequence[ChatMessage],
+        **kwargs: Any,
+    ) -> ChatResponse:
+        return self.chat(messages, **kwargs)
+    @llm_chat_callback()
+    async def astream_chat(
+        self,
+        messages: Sequence[ChatMessage],
+        **kwargs: Any,
+    ) -> ChatResponseAsyncGen:
+        async def gen() -> ChatResponseAsyncGen:
+            for message in self.stream_chat(messages, **kwargs):
+                yield message
+        # NOTE: convert generator to async generator
+        return gen()
+    @llm_completion_callback()
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        return self.complete(prompt, formatted=formatted, **kwargs)
+    @llm_completion_callback()
+    def complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        return self.complete(prompt, formatted=formatted, **kwargs)
+    @llm_completion_callback()
+    async def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        async def gen() -> CompletionResponseAsyncGen:
+            for message in self.stream_complete(prompt, formatted=formatted, **kwargs):
+                yield message
+        # NOTE: convert generator to async generator
+        return gen()
+    @llm_completion_callback()
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
+        def gen() -> CompletionResponseGen:
+            for message in self.stream_complete(prompt, formatted=formatted, **kwargs):
+                yield message
+        return gen()
+    @classmethod
+    def class_name(cls) -> str:
+        return "custom_llm"
+# # 1) Initialize your custom LLM
+# custom_llm = Kognie(
+#     api_key="kg-qnA0uVr4MbJmDtpuyQEmnZWnwe6RkZjF",
+#     model="gpt-4o-mini"
+# )
+# answer = custom_llm.chat(messages=[ChatMessage(role="user", content="Who was the first president of the United States?")])
+# print(answer)
+# answer = custom_llm.generate_img(prompt='a dog', model='flux-pro-1.1', response_format='url')
+# documents = SimpleDirectoryReader("./data").load_data()
+# vector_index = VectorStoreIndex.from_documents(documents)
+# query_engine = vector_index.as_query_engine()
+# answer = query_engine.query(
+#     "what is the documents about?"
+# )
+# print(answer)

requirements.txt ADDED Viewed

	@@ -0,0 +1,140 @@

+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.9
+aiosignal==1.3.2
+aiosqlite==0.21.0
+annotated-types==0.7.0
+anthropic==0.52.2
+anyio==4.9.0
+attrs==25.3.0
+banks==2.1.2
+beautifulsoup4==4.13.4
+boto3==1.38.30
+botocore==1.38.30
+cachetools==5.5.2
+certifi==2025.4.26
+charset-normalizer==3.4.2
+click==8.2.1
+colorama==0.4.6
+dataclasses-json==0.6.7
+Deprecated==1.2.18
+dirtyjson==1.0.8
+distro==1.9.0
+eval_type_backport==0.2.2
+fastapi==0.115.12
+ffmpy==0.6.0
+filelock==3.18.0
+filetype==1.2.0
+frozenlist==1.6.2
+fsspec==2025.5.1
+google-auth==2.40.3
+google-genai==1.19.0
+gradio==5.33.0
+gradio_client==1.10.2
+greenlet==3.2.2
+griffe==1.7.3
+groovy==0.1.2
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.32.4
+idna==3.10
+Jinja2==3.1.6
+jiter==0.10.0
+jmespath==1.0.1
+joblib==1.5.1
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.3.25
+langchain-anthropic==0.3.15
+langchain-community==0.3.24
+langchain-core==0.3.63
+langchain-openai==0.3.19
+langchain-text-splitters==0.3.8
+langsmith==0.3.45
+llama-cloud==0.1.23
+llama-cloud-services==0.6.28
+llama-index==0.12.40
+llama-index-agent-openai==0.4.9
+llama-index-cli==0.4.3
+llama-index-core==0.12.40
+llama-index-embeddings-openai==0.3.1
+llama-index-indices-managed-llama-cloud==0.7.4
+llama-index-legacy==0.9.48.post4
+llama-index-llms-anthropic==0.7.2
+llama-index-llms-google-genai==0.2.1
+llama-index-llms-mistralai==0.5.0
+llama-index-llms-openai==0.4.3
+llama-index-multi-modal-llms-openai==0.5.1
+llama-index-program-openai==0.3.2
+llama-index-question-gen-openai==0.3.1
+llama-index-readers-file==0.4.9
+llama-index-readers-llama-parse==0.4.0
+llama-index-tools-bing-search==0.3.0
+llama-parse==0.6.28
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+mcp==1.9.0
+mdurl==0.1.2
+mistralai==1.8.1
+multidict==6.4.4
+mypy_extensions==1.1.0
+nest-asyncio==1.6.0
+networkx==3.5
+nltk==3.9.1
+numpy==2.2.6
+openai==1.84.0
+orjson==3.10.18
+packaging==24.2
+pandas==2.2.3
+pillow==11.2.1
+platformdirs==4.3.8
+propcache==0.3.1
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pydantic==2.11.5
+pydantic-settings==2.9.1
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.1
+pypdf==5.6.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rich==14.0.0
+rsa==4.9.1
+ruff==0.11.12
+s3transfer==0.13.0
+safehttpx==0.1.6
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.7
+SQLAlchemy==2.0.41
+sse-starlette==2.3.6
+starlette==0.46.2
+striprtf==0.0.26
+tenacity==8.5.0
+tiktoken==0.9.0
+tomlkit==0.13.3
+tqdm==4.67.1
+typer==0.16.0
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.3
+websockets==15.0.1
+whisper==1.1.10
+wrapt==1.17.2
+yarl==1.20.0
+zstandard==0.23.0