resumesearch commited on
Commit
65a33c1
·
verified ·
1 Parent(s): 95267c4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import functools
3
+ import re
4
+ from openai import OpenAI
5
+ import tiktoken
6
+ import gradio as gr
7
+
8
+ # Configure API client
9
+ af_key = os.getenv("OPENAI_API_KEY")
10
+ if not af_key:
11
+ raise ValueError("Please set the OPENAI_API_KEY environment variable.")
12
+ client = OpenAI(api_key=af_key)
13
+
14
+ # Available models (env or fallback)
15
+ _env_models = os.getenv("OPENAI_MODEL_LIST", "gpt-3.5-turbo,gpt-4")
16
+ ALL_MODELS = [m.strip() for m in _env_models.split(",") if m.strip()]
17
+ if not ALL_MODELS:
18
+ ALL_MODELS = ["gpt-3.5-turbo"]
19
+
20
+ # Token encoder
21
+ @functools.lru_cache(maxsize=64)
22
+ def _get_encoding(model: str):
23
+ try:
24
+ return tiktoken.encoding_for_model(model)
25
+ except KeyError:
26
+ return tiktoken.get_encoding("cl100k_base")
27
+
28
+ def count_tokens(text: str, model: str) -> int:
29
+ enc = _get_encoding(model)
30
+ return len(enc.encode(text))
31
+
32
+ # Default banned words (static style list)
33
+ DEFAULT_BANNED_WORDS = [
34
+ "Hurdles", "Tapestry", "Bustling", "Harnessing", "Unveiling the power",
35
+ "Realm", "Depicted", "Demistify", "Insurmountable", "New Era",
36
+ "Poised", "Unravel", "Entanglement", "Unprecedented", "Beacon",
37
+ "Unleash", "Delve", "Enrich", "Multifaced", "Discover", "Unlock",
38
+ "Tailored", "Elegant", "Dive", "Ever-evolving", "Adventure",
39
+ "Journey", "Navigate", "Navigation"
40
+ ]
41
+ # Expressions/tones to catch and remove
42
+ DEFAULT_FILTER_PATTERNS = [
43
+ r"As an AI language model", r"I(?:’|'|i)m sorry", r"I apologize",
44
+ r"In conclusion", r"At the end of the day"
45
+ ]
46
+
47
+ # Original style prefix
48
+ ORIGINAL_PREFIX = (
49
+ "You are a clear-writing assistant. Follow these rules for every response:\n"
50
+ "1. Write in plain, everyday language.\n"
51
+ "2. Avoid fluff, jargon, and tautological expressions.\n"
52
+ "3. Use short, direct sentences.\n"
53
+ "4. Do not use complicated English words—choose simple alternatives.\n"
54
+ "5. Do not use any of the default banned words.\n"
55
+ "Whenever you’d normally reach for a banned word, pick a simple synonym or rephrase.\n"
56
+ )
57
+
58
+ # Additional prompting points
59
+ ADDITIONAL_POINTS = {
60
+ "Tone & Voice": [
61
+ "Adopt a confident but approachable tone.",
62
+ "Write as if you’re explaining to a colleague.",
63
+ "Use active voice and vary sentence length."
64
+ ],
65
+ "Audience & Purpose": [
66
+ "Assume the reader has a graduate-level background.",
67
+ "Focus on practical takeaways for a project manager.",
68
+ "Start with an executive summary, then technical details."
69
+ ],
70
+ "Structure & Signposting": [
71
+ "Begin with a 2-sentence overview, then clear headings.",
72
+ "Provide an outline before details.",
73
+ "End with three bullet-point recommendations."
74
+ ]
75
+ }
76
+
77
+ # Build the Gradio interface
78
+ with gr.Blocks() as demo:
79
+ gr.Markdown("# Custom Prompt Builder for LLMs")
80
+
81
+ model_dropdown = gr.Dropdown(ALL_MODELS, value=ALL_MODELS[0], label="Select Model")
82
+ user_query = gr.Textbox(label="User Query", placeholder="Type your question here...", lines=3)
83
+
84
+ with gr.Accordion("Original Style Instructions", open=False):
85
+ gr.Textbox(value=ORIGINAL_PREFIX, interactive=False, lines=8)
86
+
87
+ # Checkboxes for additional points
88
+ point_widgets = []
89
+ for category, pts in ADDITIONAL_POINTS.items():
90
+ widget = gr.CheckboxGroup(pts, label=category)
91
+ point_widgets.append(widget)
92
+
93
+ # Input for extra banned expressions (regex patterns)
94
+ custom_bans = gr.Textbox(
95
+ label="Custom banned expressions (comma-separated regex)", lines=2,
96
+ placeholder="e.g. 'As an AI language model','I apologize'"
97
+ )
98
+
99
+ # Generate and display the AI response
100
+ def build_and_respond(query, model, *args):
101
+ # Last arg is custom bans, rest are selected points
102
+ *selected_lists, custom_raw = args
103
+ selections = [item for sublist in selected_lists for item in sublist]
104
+
105
+ # Construct full prefix
106
+ style_block = ORIGINAL_PREFIX + "\n" + "\n".join(selections)
107
+ full_prompt = style_block + "\n\nNow, answer the user’s question.\n\n" + query
108
+
109
+ # Call the OpenAI API
110
+ resp = client.chat.completions.create(
111
+ model=model,
112
+ messages=[{"role": "user", "content": full_prompt}]
113
+ )
114
+ reply = resp.choices[0].message.content
115
+
116
+ # Build complete banned list
117
+ custom_list = [b.strip() for b in custom_raw.split(",") if b.strip()]
118
+ banned_words = DEFAULT_BANNED_WORDS
119
+ banned_patterns = DEFAULT_FILTER_PATTERNS + custom_list
120
+
121
+ # Remove banned words and patterns
122
+ for word in banned_words:
123
+ reply = re.sub(rf"\b{re.escape(word)}\b", "", reply, flags=re.IGNORECASE)
124
+ for pat in banned_patterns:
125
+ reply = re.sub(pat, "", reply, flags=re.IGNORECASE)
126
+
127
+ # Clean extra whitespace
128
+ reply = re.sub(r"\s{2,}", " ", reply).strip()
129
+
130
+ # Token usage
131
+ tokens_used = count_tokens(full_prompt, model) + count_tokens(reply, model)
132
+ return reply + f"\n\n(Tokens used: {tokens_used})"
133
+
134
+ inputs = [user_query, model_dropdown] + point_widgets + [custom_bans]
135
+ output = gr.Chatbot(label="AI Response")
136
+
137
+ user_query.submit(build_and_respond, inputs=inputs, outputs=output)
138
+ user_query.submit(lambda: "", None, user_query)
139
+
140
+ demo.launch()