webpluging

Paused

App Files Files Community

ranamhamoud commited on Apr 18, 2024

Commit

92feef3

verified ·

1 Parent(s): 8bcbf38

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -12

app.py CHANGED Viewed

@@ -29,19 +29,26 @@ this demo is governed by the original [license](https://huggingface.co/spaces/hu
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-# Model and Tokenizer Configuration
-model_id = "meta-llama/Llama-2-7b-hf"
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_use_double_quant=False,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
-base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)
-model = PeftModel.from_pretrained(base_model, "ranamhamoud/storytell")
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-tokenizer.pad_token = tokenizer.eos_token
 # MongoDB Connection
 PASSWORD = os.environ.get("MONGO_PASS")
 connect(host=f"mongodb+srv://ranamhammoud11:{PASSWORD}@stories.zf5v52a.mongodb.net/")
@@ -74,6 +81,12 @@ def generate(
     repetition_penalty: float = 1.0,
 ) -> Iterator[str]:
     conversation = []
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": make_prompt(message)})
@@ -116,6 +129,7 @@ def generate(
 chat_interface = gr.ChatInterface(
     fn=generate,
     stop_btn=None,
     examples=[
         ["Can you explain briefly to me what is the Python programming language?"],
         ["Could you please provide an explanation about the concept of recursion?"],

 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+if torch.cuda.is_available():
+    model_id = "meta-llama/Llama-2-7b-hf"
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=False,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16
+    )
+    base_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)
+    storytell_model = PeftModel.from_pretrained(base_model, "ranamhamoud/storytell")
+    storytell_tokenizer = AutoTokenizer.from_pretrained(model_id)
+    storytell_tokenizer.pad_token = tokenizer.eos_token
+    editing_model_id = "meta-llama/Llama-2-7b-chat-hf"
+    editing_model = AutoModelForCausalLM.from_pretrained(editing_model_id, torch_dtype=torch.float16, device_map="auto")
+    editing_tokenizer = AutoTokenizer.from_pretrained(model_id)
+    editing_tokenizer.use_default_system_prompt = False
 # MongoDB Connection
 PASSWORD = os.environ.get("MONGO_PASS")
 connect(host=f"mongodb+srv://ranamhammoud11:{PASSWORD}@stories.zf5v52a.mongodb.net/")
     repetition_penalty: float = 1.0,
 ) -> Iterator[str]:
     conversation = []
+    if model_choice == "Storytell":
+        model = storytell_model
+        tokenizer = storytell_tokenizer
+    else:
+        model = editing_model
+        tokenizer = editing_tokenizer
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": make_prompt(message)})
 chat_interface = gr.ChatInterface(
     fn=generate,
     stop_btn=None,
+    inputs=[model_selector = gr.Dropdown(model_choice=["Storytell", "HF Meta Llama 7b Chat"], label="Choose Model")]
     examples=[
         ["Can you explain briefly to me what is the Python programming language?"],
         ["Could you please provide an explanation about the concept of recursion?"],