""" This is the prompt engineering layer to modifty the prompt for better perfromance """ import openai from fontTools.ttLib.tables.ttProgram import instructions from openai import OpenAI from Messaging_system.LLM import LLM import os import streamlit as st from google.genai import types from google import genai class PromptEngine: def __init__(self, coreconfig): self.Core=coreconfig self.llm=LLM(self.Core) # ============================================================ def get_credential(self, key): return os.getenv(key) or st.secrets.get(key) # ============================================================= def prompt_engineering(self, prompt): """ prompt engineering layer to modify the prompt as needed :param prompt: :return: """ new_prompt = f""" Modify below prompt following best prompt engineering methods. return only the new prompt as a text. modify the prompt and instructions in tag to maximimize better results by providing the new prompt. ### Original prompt {prompt} output the new prompt as text without any additional information. """ final_prompt = self.get_final_prompt(new_prompt) return final_prompt # =========================================================== def get_final_prompt(self, prompt): if self.Core.model in self.Core.config_file["openai_models"]: final_prompt = self.get_openai_response(prompt) return final_prompt elif self.Core.model in self.Core.config_file["inference_models"]: final_prompt = self.get_inference_response(prompt) return final_prompt elif self.Core.model in self.Core.config_file["claude_models"]: final_prompt = self.get_claude_response(prompt, self.llm_instructions()) return final_prompt elif self.Core.model in self.Core.config_file["google_models"]: final_prompt = self.get_gemini_response(prompt) return final_prompt # ============================================================ def llm_instructions(self): system_prompt = """ You are a prompt engineer. Rewrite the following prompt to be clearer, more specific, and likely to produce a better response from an LLM following best prompt engineering techniques and styles. """ return system_prompt # ============================================================= def get_inference_response(self, prompt, max_retries=4): api_key = self.get_credential("inference_api_key") client = OpenAI( base_url="https://api.inference.net/v1", api_key=api_key, ) reasoning = self.Core.reasoning_model system_prompt = self.llm_instructions() for attempt in range(max_retries): try: if reasoning: response = client.chat.completions.create( model=self.Core.model, response_format={"type": "text"}, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ], reasoning_effort="medium", n=1, ) else: response = client.chat.completions.create( model=self.Core.model, response_format={"type": "text"}, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ], n=1, temperature=self.Core.temperature ) tokens = { 'prompt_tokens': response.usage.prompt_tokens, 'completion_tokens': response.usage.completion_tokens, 'total_tokens': response.usage.total_tokens } content = response.choices[0].message.content output = str(content) # validating the JSON self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens'] self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens'] self.Core.temp_token_counter += tokens['total_tokens'] return output except openai.APIConnectionError as e: print("The server could not be reached") print(e.__cause__) # an underlying Exception, likely raised within httpx. except openai.RateLimitError as e: print("A 429 status code was received; we should back off a bit.") except openai.APIStatusError as e: print("Another non-200-range status code was received") print(e.status_code) print(e.response) print("Max retries exceeded. Returning empty response.") return prompt # returns original prompt if needed # =============================================================== def get_openai_response(self, prompt, max_retries=4): """ sending the prompt to openai LLM and get back the response """ openai.api_key = self.Core.api_key client = OpenAI(api_key=self.Core.api_key) reasoning = self.Core.reasoning_model system_prompt = self.llm_instructions() for attempt in range(max_retries): try: if reasoning: response = client.chat.completions.create( model=self.Core.model, response_format={"type": "text"}, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ], reasoning_effort="medium", n=1, ) else: response = client.chat.completions.create( model=self.Core.model, response_format={"type": "text"}, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ], n=1, temperature=self.Core.temperature ) tokens = { 'prompt_tokens': response.usage.prompt_tokens, 'completion_tokens': response.usage.completion_tokens, 'total_tokens': response.usage.total_tokens } content = response.choices[0].message.content output = str(content) # validating the JSON self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens'] self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens'] self.Core.temp_token_counter += tokens['total_tokens'] return output except openai.APIConnectionError as e: print("The server could not be reached") print(e.__cause__) # an underlying Exception, likely raised within httpx. except openai.RateLimitError as e: print("A 429 status code was received; we should back off a bit.") except openai.APIStatusError as e: print("Another non-200-range status code was received") print(e.status_code) print(e.response) print("Max retries exceeded. Returning empty response.") return prompt # returns original prompt if needed # ========================================================================== def get_gemini_response(self, prompt, max_retries=4): """ Send prompt to Google Gemini LLM and get back the response :param prompt: :param max_retries: :return: """ client = genai.Client(api_key=self.get_credential("Google_API")) for attempt in range(max_retries): try: response = client.models.generate_content( model=self.Core.model, contents=prompt, config=types.GenerateContentConfig( thinking_config=types.ThinkingConfig(thinking_budget=0), system_instruction=self.llm_instructions(), temperature=self.Core.temperature, response_mime_type = "text/plain" # application/json )) output = str(response.text) return output except Exception as e: print(f"Error in attempt {attempt}: {e}") # ========================================================================== def get_claude_response(self, prompt, instructions, max_retries=4): """ send prompt to claude LLM and get back the response :param prompt: :param instructions: :return: """ for attempt in range(max_retries): try: message = self.llm.client.messages.create( model=self.Core.model, max_tokens=4096, system = instructions, messages=[ {"role": "user", "content": prompt} ], temperature=self.Core.temperature ) # Try generating the response response = message.content[0].text return response except Exception as e: print(f"Error: {e}") print("Max retries exceeded. Returning empty response.") return prompt # returns original prompt if needed