Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,43 +18,24 @@ except Exception as e:
|
|
| 18 |
base_model = None
|
| 19 |
peft_model = None
|
| 20 |
|
| 21 |
-
def respond(
|
| 22 |
-
message,
|
| 23 |
-
history,
|
| 24 |
-
system_message,
|
| 25 |
-
max_tokens,
|
| 26 |
-
temperature,
|
| 27 |
-
top_p,
|
| 28 |
-
):
|
| 29 |
-
"""
|
| 30 |
-
Generates a complete response based on the user message and history using the provided PEFT model.
|
| 31 |
-
Args:
|
| 32 |
-
message (str): The user's input message.
|
| 33 |
-
history (list of tuples): A list containing tuples of (user_message, assistant_response).
|
| 34 |
-
system_message (str): The system's initial message or prompt.
|
| 35 |
-
max_tokens (int): The maximum number of tokens to generate.
|
| 36 |
-
temperature (float): The temperature parameter for generation.
|
| 37 |
-
top_p (float): The top_p parameter for nucleus sampling.
|
| 38 |
-
Returns:
|
| 39 |
-
str: The complete generated response.
|
| 40 |
-
"""
|
| 41 |
global tokenizer, peft_model
|
|
|
|
| 42 |
if tokenizer is None or peft_model is None:
|
| 43 |
return "Model loading failed. Please check the logs."
|
| 44 |
-
|
| 45 |
# Construct the prompt
|
| 46 |
prompt = system_message
|
| 47 |
for user_msg, assistant_msg in history:
|
| 48 |
if user_msg:
|
| 49 |
-
prompt += f"
|
| 50 |
if assistant_msg:
|
| 51 |
-
prompt += f"
|
| 52 |
-
prompt += f"
|
| 53 |
-
|
| 54 |
# Tokenize the input prompt
|
| 55 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
|
| 56 |
-
|
| 57 |
-
# Generate the complete output
|
| 58 |
try:
|
| 59 |
outputs = peft_model.generate(
|
| 60 |
**inputs,
|
|
@@ -63,43 +44,24 @@ def respond(
|
|
| 63 |
top_p=top_p,
|
| 64 |
do_sample=True
|
| 65 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
except Exception as e:
|
| 67 |
return f"Generation error: {e}"
|
| 68 |
|
| 69 |
-
# Decode the generated tokens
|
| 70 |
-
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 71 |
-
|
| 72 |
-
# Extract content between <user>...</user> tags
|
| 73 |
-
def extract_user_content(text):
|
| 74 |
-
"""
|
| 75 |
-
Extracts and returns content between <user>...</user> tags in the given text.
|
| 76 |
-
If multiple such sections exist, their contents are concatenated.
|
| 77 |
-
"""
|
| 78 |
-
pattern = r'<user>(.*?)</user>'
|
| 79 |
-
matches = re.findall(pattern, text, re.DOTALL)
|
| 80 |
-
extracted_content = '\n'.join(match.strip() for match in matches)
|
| 81 |
-
return extracted_content
|
| 82 |
-
|
| 83 |
-
# Extract and return the complete normalized text
|
| 84 |
-
normalized_text = extract_user_content(generated_text)
|
| 85 |
-
return normalized_text.strip()
|
| 86 |
-
|
| 87 |
demo = gr.ChatInterface(
|
| 88 |
respond,
|
| 89 |
additional_inputs=[
|
| 90 |
-
gr.Textbox(
|
| 91 |
-
value="Take the user input in Hindi language and normalize specific entities, Only including: Dates (any format) Currencies Scientific units, <Example> Exampleinput : 2012–13 में रक्षा सेवाओं के लिए 1,93,407 करोड़ रुपए का प्रावधान किया गया था, जबकि 2011–2012 में यह राशि 1,64,415 करोइ़ थी, Exampleoutput: ट्वेन्टी ट्वेल्व फिफ्टीन में रक्षा सेवाओं के लिए वन करोड़ निनेटी थ्री थाउजेंड फोर हंड्रेड सेवन करोड़ रुपए का प्रावधान किया गया था, जबकि ट्वेन्टी एलेवन ट्वेल्व में यह राशि वन करोड़ सिक्स्टी फोर थाउजेंड फोर हंड्रेड फिफ्टीन करोड़ थी </Example>, Only provide the normalized output with atmost accuracy <user> input:",
|
| 92 |
-
label="System message"
|
| 93 |
-
),
|
| 94 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 95 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 96 |
-
gr.Slider(
|
| 97 |
-
minimum=0.1,
|
| 98 |
-
maximum=1.0,
|
| 99 |
-
value=0.95,
|
| 100 |
-
step=0.05,
|
| 101 |
-
label="Top-p (nucleus sampling)",
|
| 102 |
-
),
|
| 103 |
],
|
| 104 |
)
|
| 105 |
|
|
|
|
| 18 |
base_model = None
|
| 19 |
peft_model = None
|
| 20 |
|
| 21 |
+
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
global tokenizer, peft_model
|
| 23 |
+
|
| 24 |
if tokenizer is None or peft_model is None:
|
| 25 |
return "Model loading failed. Please check the logs."
|
| 26 |
+
|
| 27 |
# Construct the prompt
|
| 28 |
prompt = system_message
|
| 29 |
for user_msg, assistant_msg in history:
|
| 30 |
if user_msg:
|
| 31 |
+
prompt += f"\nUser: {user_msg}"
|
| 32 |
if assistant_msg:
|
| 33 |
+
prompt += f"\nAssistant: {assistant_msg}"
|
| 34 |
+
prompt += f"\nUser: {message}"
|
| 35 |
+
|
| 36 |
# Tokenize the input prompt
|
| 37 |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
|
| 38 |
+
|
|
|
|
| 39 |
try:
|
| 40 |
outputs = peft_model.generate(
|
| 41 |
**inputs,
|
|
|
|
| 44 |
top_p=top_p,
|
| 45 |
do_sample=True
|
| 46 |
)
|
| 47 |
+
|
| 48 |
+
# Get the last message from the generated text
|
| 49 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 50 |
+
lines = generated_text.split('\n')
|
| 51 |
+
last_line = lines[-1] if lines else ""
|
| 52 |
+
|
| 53 |
+
return last_line.strip()
|
| 54 |
+
|
| 55 |
except Exception as e:
|
| 56 |
return f"Generation error: {e}"
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
demo = gr.ChatInterface(
|
| 59 |
respond,
|
| 60 |
additional_inputs=[
|
| 61 |
+
gr.Textbox(label="System message"),
|
|
|
|
|
|
|
|
|
|
| 62 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 63 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 64 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
],
|
| 66 |
)
|
| 67 |
|