webpluging

Paused

App Files Files Community

ranamhamoud commited on Apr 11, 2024

Commit

50a1316

verified ·

1 Parent(s): 28d8d0f

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -11

app.py CHANGED Viewed

@@ -26,11 +26,7 @@ LICENSE = """
 As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta,
 this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
 """
-EXAMPLES = """
-<p/>
----
-Keep in mind that the examples are cached.
-"""
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
@@ -55,17 +51,18 @@ class Story(Document):
     story_id = SequenceField(primary_key=True)
 def make_prompt(entry):
-    return f"### Human: YOUR INSTRUCTION HERE,ALWAYS USE A STORY,INCLUDE ASSESMENTS THROUGHOUT AND A TECHNICAL SUMMARY: {entry} ### Assistant:"
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     max_new_tokens: int = 1024,
-    temperature: float = 0.3,  # Lower -> less random
-    top_p: float = 0.1,  # Lower -> less random, considering only the top 10% of tokens at each step
-    top_k: int = 1,  # Least random, only the most likely next token is considered
-    repetition_penalty: float = 1.0,  # No repetition penalty
 ) -> Iterator[str]:
     conversation = []
     for user, assistant in chat_history:
@@ -119,7 +116,6 @@ chat_interface = gr.ChatInterface(
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     chat_interface.render()
-    gr.Markdown(EXAMPLES)
     gr.Markdown(LICENSE)
 if __name__ == "__main__":

 As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta,
 this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
     story_id = SequenceField(primary_key=True)
 def make_prompt(entry):
+    return f"### Human: YOUR INSTRUCTION HERE,ALWAYS USE A STORY,INCLUDE ASSESMENTS THROUGHOUT AND A TECHNICAL SUMMARY,REPLY BASED ON STORY WHEN USER ANSWERS: {entry} ### Assistant:"
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     max_new_tokens: int = 1024,
+    temperature: float = 0.7,
+    top_p: float = 0.8,
+    top_k: int = 40,
+    repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
     for user, assistant in chat_history:
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     chat_interface.render()
     gr.Markdown(LICENSE)
 if __name__ == "__main__":