from fastapi import FastAPI, Response, Cookie
from fastapi.responses import HTMLResponse
from pydantic import BaseModel, Field
import time
import os
value = os.environ.get('YOUR_ENV_KEY')
app = FastAPI()
Tokens = []
History = []
@app.get("/", response_class=HTMLResponse)
async def read_root(response: Response):
token = time.time()
Tokens.append(str(token))
History.append([{"role": "system", "content": "You are the Prompt Optimization Engine (POE), a tool designed to maximize the clarity and effectiveness of user queries for large language models. Your sole function is to accept a user-submitted prompt, analyze its core intent, context, and desired output format. You must then return a single, improved, streamlined version. The refined prompt must be concise, articulate the required persona and constraints clearly, and fully preserve the original user intent. Always prioritize action-oriented language and remove any conversational filler or ambiguity."}])
response.set_cookie(key="token", value=token, httponly=True, secure=True, samesite='none') # Set cookie
return '''
Chatbot Assistant
Chat Assistant
This AI model provides information based on pre-existing data and patterns, but may not always offer accurate, up-to-date, or context-specific advice. Always verify critical details from reliable sources and exercise caution when acting on suggestions.
'''
from google import genai
from google.genai import types
import requests
Api_key = os.getenv('API_KEY')
System_instruction = '''**System Prompt for a Programmer-Oriented Coding Assistant:**\n\n> You are a highly focused, fast, and expert-level coding assistant built for professional programmers.\n> Your primary role is **to assist with code writing, debugging, refactoring, optimization, and architecture**.\n> Avoid unnecessary explanations unless asked. Do not teach—**support the user like a senior pair programmer** who assumes context and skill. Prioritize clean, correct, and efficient code.\n\n> Always:\n> * Get straight to the point.\n> * Suggest the most practical and scalable solution.\n> * Respond with complete code blocks when needed.\n> * Use strong defaults and modern conventions.\n> * Assume the user knows what they're doing.\n> * Think ahead: anticipate potential pitfalls or better approaches.\n> * Give fast, minimal answers when asked for quick help.\n\n> Only elaborate if specifically requested (e.g., “explain,” “why,” “teach,” “verbose”)'''
client = genai.Client(api_key=Api_key)
class ChatRequest(BaseModel):
"""Request model for the chat endpoint."""
prompt: str
def gen(prompt):
response = client.models.generate_content(
model="gemma-3-4b-it",
contents= prompt
)
return response.text
@app.post("/response")
async def handle_chat(chat_request: ChatRequest, token: str = Cookie(None)):
a= time.time()
if token in Tokens:
i = Tokens.index(token)
History[i].append({"role": "user", "content": chat_request.prompt})
text = 'system\n'+System_instruction+'\nuser\n'
for j in History[i]:
if j['role']== 'user':
text = text + j['content'] + '\nmodel\n'
else : text = text + j['content'] + '\nuser\n'
stream = gen(text)
History[i].append({"role": "assistant", "content": stream})
b = time.time()
return {"text": stream,
"time": (b-a)/1000,
"t_per_sec": 0}
else: return 'Please stop. Just refresh the page.'
@app.post("/history")
async def history(chat_request: ChatRequest):
if chat_request.prompt == value:
time.sleep(10)
return History
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)