Danialebrat's picture
Adding Google Gemini 2.5 flash lite
52ee99e
"""
This is the prompt engineering layer to modifty the prompt for better perfromance
"""
import openai
from fontTools.ttLib.tables.ttProgram import instructions
from openai import OpenAI
from Messaging_system.LLM import LLM
import os
import streamlit as st
from google.genai import types
from google import genai
class PromptEngine:
def __init__(self, coreconfig):
self.Core=coreconfig
self.llm=LLM(self.Core)
# ============================================================
def get_credential(self, key):
return os.getenv(key) or st.secrets.get(key)
# =============================================================
def prompt_engineering(self, prompt):
"""
prompt engineering layer to modify the prompt as needed
:param prompt:
:return:
"""
new_prompt = f"""
Modify below prompt following best prompt engineering methods. return only the new prompt as a text.
modify the prompt and instructions in <original_prompt> tag to maximimize better results by providing the new prompt.
### Original prompt
<original_prompt>
{prompt}
</original_prompt>
output the new prompt as text without any additional information.
"""
final_prompt = self.get_final_prompt(new_prompt)
return final_prompt
# ===========================================================
def get_final_prompt(self, prompt):
if self.Core.model in self.Core.config_file["openai_models"]:
final_prompt = self.get_openai_response(prompt)
return final_prompt
elif self.Core.model in self.Core.config_file["inference_models"]:
final_prompt = self.get_inference_response(prompt)
return final_prompt
elif self.Core.model in self.Core.config_file["claude_models"]:
final_prompt = self.get_claude_response(prompt, self.llm_instructions())
return final_prompt
elif self.Core.model in self.Core.config_file["google_models"]:
final_prompt = self.get_gemini_response(prompt)
return final_prompt
# ============================================================
def llm_instructions(self):
system_prompt = """
You are a prompt engineer. Rewrite the following prompt to be clearer, more specific, and likely to produce a better response from an LLM following best prompt engineering techniques and styles.
"""
return system_prompt
# =============================================================
def get_inference_response(self, prompt, max_retries=4):
api_key = self.get_credential("inference_api_key")
client = OpenAI(
base_url="https://api.inference.net/v1",
api_key=api_key,
)
reasoning = self.Core.reasoning_model
system_prompt = self.llm_instructions()
for attempt in range(max_retries):
try:
if reasoning:
response = client.chat.completions.create(
model=self.Core.model,
response_format={"type": "text"},
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
reasoning_effort="medium",
n=1,
)
else:
response = client.chat.completions.create(
model=self.Core.model,
response_format={"type": "text"},
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
n=1,
temperature=self.Core.temperature
)
tokens = {
'prompt_tokens': response.usage.prompt_tokens,
'completion_tokens': response.usage.completion_tokens,
'total_tokens': response.usage.total_tokens
}
content = response.choices[0].message.content
output = str(content)
# validating the JSON
self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
self.Core.temp_token_counter += tokens['total_tokens']
return output
except openai.APIConnectionError as e:
print("The server could not be reached")
print(e.__cause__) # an underlying Exception, likely raised within httpx.
except openai.RateLimitError as e:
print("A 429 status code was received; we should back off a bit.")
except openai.APIStatusError as e:
print("Another non-200-range status code was received")
print(e.status_code)
print(e.response)
print("Max retries exceeded. Returning empty response.")
return prompt # returns original prompt if needed
# ===============================================================
def get_openai_response(self, prompt, max_retries=4):
"""
sending the prompt to openai LLM and get back the response
"""
openai.api_key = self.Core.api_key
client = OpenAI(api_key=self.Core.api_key)
reasoning = self.Core.reasoning_model
system_prompt = self.llm_instructions()
for attempt in range(max_retries):
try:
if reasoning:
response = client.chat.completions.create(
model=self.Core.model,
response_format={"type": "text"},
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
reasoning_effort="medium",
n=1,
)
else:
response = client.chat.completions.create(
model=self.Core.model,
response_format={"type": "text"},
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
n=1,
temperature=self.Core.temperature
)
tokens = {
'prompt_tokens': response.usage.prompt_tokens,
'completion_tokens': response.usage.completion_tokens,
'total_tokens': response.usage.total_tokens
}
content = response.choices[0].message.content
output = str(content)
# validating the JSON
self.Core.total_tokens['prompt_tokens'] += tokens['prompt_tokens']
self.Core.total_tokens['completion_tokens'] += tokens['completion_tokens']
self.Core.temp_token_counter += tokens['total_tokens']
return output
except openai.APIConnectionError as e:
print("The server could not be reached")
print(e.__cause__) # an underlying Exception, likely raised within httpx.
except openai.RateLimitError as e:
print("A 429 status code was received; we should back off a bit.")
except openai.APIStatusError as e:
print("Another non-200-range status code was received")
print(e.status_code)
print(e.response)
print("Max retries exceeded. Returning empty response.")
return prompt # returns original prompt if needed
# ==========================================================================
def get_gemini_response(self, prompt, max_retries=4):
"""
Send prompt to Google Gemini LLM and get back the response
:param prompt:
:param max_retries:
:return:
"""
client = genai.Client(api_key=self.get_credential("Google_API"))
for attempt in range(max_retries):
try:
response = client.models.generate_content(
model=self.Core.model,
contents=prompt,
config=types.GenerateContentConfig(
thinking_config=types.ThinkingConfig(thinking_budget=0),
system_instruction=self.llm_instructions(),
temperature=self.Core.temperature,
response_mime_type = "text/plain" # application/json
))
output = str(response.text)
return output
except Exception as e:
print(f"Error in attempt {attempt}: {e}")
# ==========================================================================
def get_claude_response(self, prompt, instructions, max_retries=4):
"""
send prompt to claude LLM and get back the response
:param prompt:
:param instructions:
:return:
"""
for attempt in range(max_retries):
try:
message = self.llm.client.messages.create(
model=self.Core.model,
max_tokens=4096,
system = instructions,
messages=[
{"role": "user", "content": prompt}
],
temperature=self.Core.temperature
)
# Try generating the response
response = message.content[0].text
return response
except Exception as e:
print(f"Error: {e}")
print("Max retries exceeded. Returning empty response.")
return prompt # returns original prompt if needed