Spaces:

akshaynayaks9845
/

rml-ai-demo

Sleeping

App Files Files Community

akshaynayaks9845 commited on Aug 18

Commit

829da31

verified ·

1 Parent(s): 48e742d

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +16 -15

app.py CHANGED Viewed

@@ -1,21 +1,24 @@
 import gradio as gr
 import time
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
 MODEL_ID = "akshaynayaks9845/rml-ai-phi1_5-rml-100k"
-def load_pipeline():
     try:
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
         model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
-        pipe = TextGenerationPipeline(model=model, tokenizer=tokenizer, device=-1)
-        return pipe
     except Exception as e:
-        return str(e)
-pipe_or_err = load_pipeline()
 SAMPLES = [
     "What is artificial intelligence?",
@@ -25,10 +28,11 @@ SAMPLES = [
 def generate_response(prompt, max_new_tokens=128, temperature=0.2):
     start = time.time()
-    if isinstance(pipe_or_err, str):
-        return f"Model load error: {pipe_or_err}"
     try:
-        outputs = pipe_or_err(
             prompt,
             max_new_tokens=int(max_new_tokens),
             do_sample=bool(temperature and temperature > 0),
@@ -38,14 +42,11 @@ def generate_response(prompt, max_new_tokens=128, temperature=0.2):
             truncation=True,
         )
         text = outputs[0]["generated_text"]
-        # Return only continuation if the model echoes the prompt
         reply = text[len(prompt):].strip() if text.startswith(prompt) else text
         elapsed = int((time.time() - start) * 1000)
-        return f"{reply}
-(⏱️ {elapsed} ms)"
     except Exception as e:
-        return f"Error: {str(e)}"
 with gr.Blocks(title="RML-AI Demo") as demo:
     gr.Markdown('''

 import gradio as gr
 import time
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline
 MODEL_ID = "akshaynayaks9845/rml-ai-phi1_5-rml-100k"
+_PIPE = None
+_ERR = None
+def get_pipeline():
+    global _PIPE, _ERR
+    if _PIPE is not None or _ERR is not None:
+        return _PIPE, _ERR
     try:
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
         model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
+        _PIPE = TextGenerationPipeline(model=model, tokenizer=tokenizer, device=-1)
     except Exception as e:
+        _ERR = str(e)
+    return _PIPE, _ERR
 SAMPLES = [
     "What is artificial intelligence?",
 def generate_response(prompt, max_new_tokens=128, temperature=0.2):
     start = time.time()
+    pipe, err = get_pipeline()
+    if err is not None:
+        return "Model load error: " + err
     try:
+        outputs = pipe(
             prompt,
             max_new_tokens=int(max_new_tokens),
             do_sample=bool(temperature and temperature > 0),
             truncation=True,
         )
         text = outputs[0]["generated_text"]
         reply = text[len(prompt):].strip() if text.startswith(prompt) else text
         elapsed = int((time.time() - start) * 1000)
+        return reply + "\n\n(⏱️ " + str(elapsed) + " ms)"
     except Exception as e:
+        return "Error: " + str(e)
 with gr.Blocks(title="RML-AI Demo") as demo:
     gr.Markdown('''