Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import functools
|
| 3 |
+
import re
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
import tiktoken
|
| 6 |
+
import gradio as gr
|
| 7 |
+
|
| 8 |
+
# Configure API client
|
| 9 |
+
af_key = os.getenv("OPENAI_API_KEY")
|
| 10 |
+
if not af_key:
|
| 11 |
+
raise ValueError("Please set the OPENAI_API_KEY environment variable.")
|
| 12 |
+
client = OpenAI(api_key=af_key)
|
| 13 |
+
|
| 14 |
+
# Available models (env or fallback)
|
| 15 |
+
_env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-3.5-turbo,gpt-4")
|
| 16 |
+
ALL_MODELS = [m.strip() for m in _env_models.split(",") if m.strip()]
|
| 17 |
+
if not ALL_MODELS:
|
| 18 |
+
ALL_MODELS = ["gpt-3.5-turbo"]
|
| 19 |
+
|
| 20 |
+
# Token encoder
|
| 21 |
+
@functools.lru_cache(maxsize=64)
|
| 22 |
+
def _get_encoding(model: str):
|
| 23 |
+
try:
|
| 24 |
+
return tiktoken.encoding_for_model(model)
|
| 25 |
+
except KeyError:
|
| 26 |
+
return tiktoken.get_encoding("cl100k_base")
|
| 27 |
+
|
| 28 |
+
def count_tokens(text: str, model: str) -> int:
|
| 29 |
+
enc = _get_encoding(model)
|
| 30 |
+
return len(enc.encode(text))
|
| 31 |
+
|
| 32 |
+
# Default banned words (static style list)
|
| 33 |
+
DEFAULT_BANNED_WORDS = [
|
| 34 |
+
"Hurdles", "Tapestry", "Bustling", "Harnessing", "Unveiling the power",
|
| 35 |
+
"Realm", "Depicted", "Demistify", "Insurmountable", "New Era",
|
| 36 |
+
"Poised", "Unravel", "Entanglement", "Unprecedented", "Beacon",
|
| 37 |
+
"Unleash", "Delve", "Enrich", "Multifaced", "Discover", "Unlock",
|
| 38 |
+
"Tailored", "Elegant", "Dive", "Ever-evolving", "Adventure",
|
| 39 |
+
"Journey", "Navigate", "Navigation"
|
| 40 |
+
]
|
| 41 |
+
# Expressions/tones to catch and remove
|
| 42 |
+
DEFAULT_FILTER_PATTERNS = [
|
| 43 |
+
r"As an AI language model", r"I(?:’|'|i)m sorry", r"I apologize",
|
| 44 |
+
r"In conclusion", r"At the end of the day"
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
# Original style prefix
|
| 48 |
+
ORIGINAL_PREFIX = (
|
| 49 |
+
"You are a clear-writing assistant. Follow these rules for every response:\n"
|
| 50 |
+
"1. Write in plain, everyday language.\n"
|
| 51 |
+
"2. Avoid fluff, jargon, and tautological expressions.\n"
|
| 52 |
+
"3. Use short, direct sentences.\n"
|
| 53 |
+
"4. Do not use complicated English words—choose simple alternatives.\n"
|
| 54 |
+
"5. Do not use any of the default banned words.\n"
|
| 55 |
+
"Whenever you’d normally reach for a banned word, pick a simple synonym or rephrase.\n"
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Additional prompting points
|
| 59 |
+
ADDITIONAL_POINTS = {
|
| 60 |
+
"Tone & Voice": [
|
| 61 |
+
"Adopt a confident but approachable tone.",
|
| 62 |
+
"Write as if you’re explaining to a colleague.",
|
| 63 |
+
"Use active voice and vary sentence length."
|
| 64 |
+
],
|
| 65 |
+
"Audience & Purpose": [
|
| 66 |
+
"Assume the reader has a graduate-level background.",
|
| 67 |
+
"Focus on practical takeaways for a project manager.",
|
| 68 |
+
"Start with an executive summary, then technical details."
|
| 69 |
+
],
|
| 70 |
+
"Structure & Signposting": [
|
| 71 |
+
"Begin with a 2-sentence overview, then clear headings.",
|
| 72 |
+
"Provide an outline before details.",
|
| 73 |
+
"End with three bullet-point recommendations."
|
| 74 |
+
]
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
# Build the Gradio interface
|
| 78 |
+
with gr.Blocks() as demo:
|
| 79 |
+
gr.Markdown("# Custom Prompt Builder for LLMs")
|
| 80 |
+
|
| 81 |
+
model_dropdown = gr.Dropdown(ALL_MODELS, value=ALL_MODELS[0], label="Select Model")
|
| 82 |
+
user_query = gr.Textbox(label="User Query", placeholder="Type your question here...", lines=3)
|
| 83 |
+
|
| 84 |
+
with gr.Accordion("Original Style Instructions", open=False):
|
| 85 |
+
gr.Textbox(value=ORIGINAL_PREFIX, interactive=False, lines=8)
|
| 86 |
+
|
| 87 |
+
# Checkboxes for additional points
|
| 88 |
+
point_widgets = []
|
| 89 |
+
for category, pts in ADDITIONAL_POINTS.items():
|
| 90 |
+
widget = gr.CheckboxGroup(pts, label=category)
|
| 91 |
+
point_widgets.append(widget)
|
| 92 |
+
|
| 93 |
+
# Input for extra banned expressions (regex patterns)
|
| 94 |
+
custom_bans = gr.Textbox(
|
| 95 |
+
label="Custom banned expressions (comma-separated regex)", lines=2,
|
| 96 |
+
placeholder="e.g. 'As an AI language model','I apologize'"
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Generate and display the AI response
|
| 100 |
+
def build_and_respond(query, model, *args):
|
| 101 |
+
# Last arg is custom bans, rest are selected points
|
| 102 |
+
*selected_lists, custom_raw = args
|
| 103 |
+
selections = [item for sublist in selected_lists for item in sublist]
|
| 104 |
+
|
| 105 |
+
# Construct full prefix
|
| 106 |
+
style_block = ORIGINAL_PREFIX + "\n" + "\n".join(selections)
|
| 107 |
+
full_prompt = style_block + "\n\nNow, answer the user’s question.\n\n" + query
|
| 108 |
+
|
| 109 |
+
# Call the OpenAI API
|
| 110 |
+
resp = client.chat.completions.create(
|
| 111 |
+
model=model,
|
| 112 |
+
messages=[{"role": "user", "content": full_prompt}]
|
| 113 |
+
)
|
| 114 |
+
reply = resp.choices[0].message.content
|
| 115 |
+
|
| 116 |
+
# Build complete banned list
|
| 117 |
+
custom_list = [b.strip() for b in custom_raw.split(",") if b.strip()]
|
| 118 |
+
banned_words = DEFAULT_BANNED_WORDS
|
| 119 |
+
banned_patterns = DEFAULT_FILTER_PATTERNS + custom_list
|
| 120 |
+
|
| 121 |
+
# Remove banned words and patterns
|
| 122 |
+
for word in banned_words:
|
| 123 |
+
reply = re.sub(rf"\b{re.escape(word)}\b", "", reply, flags=re.IGNORECASE)
|
| 124 |
+
for pat in banned_patterns:
|
| 125 |
+
reply = re.sub(pat, "", reply, flags=re.IGNORECASE)
|
| 126 |
+
|
| 127 |
+
# Clean extra whitespace
|
| 128 |
+
reply = re.sub(r"\s{2,}", " ", reply).strip()
|
| 129 |
+
|
| 130 |
+
# Token usage
|
| 131 |
+
tokens_used = count_tokens(full_prompt, model) + count_tokens(reply, model)
|
| 132 |
+
return reply + f"\n\n(Tokens used: {tokens_used})"
|
| 133 |
+
|
| 134 |
+
inputs = [user_query, model_dropdown] + point_widgets + [custom_bans]
|
| 135 |
+
output = gr.Chatbot(label="AI Response")
|
| 136 |
+
|
| 137 |
+
user_query.submit(build_and_respond, inputs=inputs, outputs=output)
|
| 138 |
+
user_query.submit(lambda: "", None, user_query)
|
| 139 |
+
|
| 140 |
+
demo.launch()
|