Spaces:

KGSAGAR
/

Hindi_Text_Normalization

Sleeping

App Files Files Community

KGSAGAR commited on Feb 11, 2025

Commit

896a3c2

verified ·

1 Parent(s): 6cb1f28

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -24

app.py CHANGED Viewed

@@ -1,30 +1,35 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 import re
 import torch
 # Model and tokenizer loading (outside the respond function)
 try:
-    # Load the tokenizer
     tokenizer = AutoTokenizer.from_pretrained("sarvamai/sarvam-1")
-    # Load the base model
     base_model = AutoModelForCausalLM.from_pretrained("sarvamai/sarvam-1")
-    # Load the PEFT model
     peft_model = PeftModel.from_pretrained(base_model, "KGSAGAR/Sarvam-1-text-normalization-3r")
-    # Merge and unload the PEFT model into the base model
     peft_model = peft_model.merge_and_unload()
-    print("Model loaded successfully!")
 except Exception as e:
     print(f"Error loading model: {e}")
     tokenizer = None
     peft_model = None
-def respond(message, history, system_message, max_tokens, temperature, top_p):
     """
     Generates a response based on the user message and history using the provided PEFT model.
     Args:
@@ -34,12 +39,13 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
         max_tokens (int): The maximum number of tokens to generate.
         temperature (float): The temperature parameter for generation.
         top_p (float): The top_p parameter for nucleus sampling.
-    Returns:
-        str: The generated response.
     """
-    global tokenizer, peft_model  # Access global variables
     if tokenizer is None or peft_model is None:
-        return "Model loading failed. Please check the logs."
     # Construct the prompt
     prompt = system_message
@@ -63,7 +69,9 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
             do_sample=True  # Enable sampling for more diverse outputs
         )
     except Exception as e:
-        return f"Generation error: {e}"
     # Decode the generated tokens
     generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -74,29 +82,41 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
         Extracts and returns content between <user>...</user> tags in the given text.
         If multiple such sections exist, their contents are concatenated.
         """
-        pattern = re.compile(r'<user>(.*?)</user>|output:', re.IGNORECASE)
         matches = re.findall(pattern, text, re.DOTALL)
-        extracted_content = '\n'.join(match.strip() for match in matches if match)
         return extracted_content
     # Extract the normalized text
     normalized_text = extract_user_content(generated_text)
-    return normalized_text
-# Gradio interface setup
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(
-            value="Take the user input in Hindi language and normalize specific entities, including: Dates (any format), Currencies, Scientific units. Example input: 2012–13 में रक्षा सेवाओं के लिए 1,93,407 करोड़ रुपए का प्रावधान किया गया था, जबकि 2011–2012 में यह राशि 1,64,415 करोड़ थी. Example output: ट्वेन्टी ट्वेल्व थर्टीन में रक्षा सेवाओं के लिए एक लाख तिरानवे हजार चार सौ सात करोड़ रुपए का प्रावधान किया गया था, जबकि ट्वेन्टी इलेवन ट्वेल्व में यह राशि एक लाख चौसठ हजार चार सौ पंद्रह करोड़ थी. Only provide the normalized output with utmost accuracy.",
-            label="System message"
-        ),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
+from huggingface_hub import InferenceClient
 import re
 import torch
+"""
+For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+"""
 # Model and tokenizer loading (outside the respond function)
 try:
     tokenizer = AutoTokenizer.from_pretrained("sarvamai/sarvam-1")
     base_model = AutoModelForCausalLM.from_pretrained("sarvamai/sarvam-1")
     peft_model = PeftModel.from_pretrained(base_model, "KGSAGAR/Sarvam-1-text-normalization-3r")
     peft_model = peft_model.merge_and_unload()
+    print("Model loaded successfully!")  # Add this line
 except Exception as e:
     print(f"Error loading model: {e}")
     tokenizer = None
+    base_model = None
     peft_model = None
+def respond(
+    message,
+    history,
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+):
     """
     Generates a response based on the user message and history using the provided PEFT model.
     Args:
         max_tokens (int): The maximum number of tokens to generate.
         temperature (float): The temperature parameter for generation.
         top_p (float): The top_p parameter for nucleus sampling.
+    Yields:
+        str: The generated response up to the current token.
     """
+    global tokenizer, peft_model #access global variables
     if tokenizer is None or peft_model is None:
+        yield "Model loading failed. Please check the logs."
+        return
     # Construct the prompt
     prompt = system_message
             do_sample=True  # Enable sampling for more diverse outputs
         )
     except Exception as e:
+         yield f"Generation error: {e}"
+         return
     # Decode the generated tokens
     generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         Extracts and returns content between <user>...</user> tags in the given text.
         If multiple such sections exist, their contents are concatenated.
         """
+        pattern = r'<user>(.*?)</user>'
         matches = re.findall(pattern, text, re.DOTALL)
+        extracted_content = '\n'.join(match.strip() for match in matches)
         return extracted_content
     # Extract the normalized text
     normalized_text = extract_user_content(generated_text)
+    # Stream the response token by token
+    response = ""
+    for token in normalized_text.split():
+        response += token + " "
+        yield response.strip()
+"""
+For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="Take the user input in Hindi language and normalize specific entities, Only including: Dates (any format) Currencies Scientific units, <Example> Exampleinput :  2012–13 मे�� रक्षा सेवाओं के लिए 1,93,407 करोड़ रुपए का प्रावधान किया गया था, जबकि 2011–2012 में यह राशि 1,64,415 करोइ़ थी, Exampleoutput: ट्वेन्टी ट्वेल्व फिफ्टीन में रक्षा सेवाओं के लिए वन करोड़ निनेटी थ्री थाउजेंड फोर हंड्रेड सेवन करोड़ रुपए का प्रावधान किया गया था, जबकि ट्वेन्टी एलेवन ट्वेल्व में यह राशि वन करोड़ सिक्स्टी फोर थाउजेंड फोर हंड्रेड फिफ्टीन करोड़ थी </Example>, Only provide the normalized output with atmost accuracy <user> input:", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
     ],
 )
 if __name__ == "__main__":
     demo.launch()