Update app.py
Browse files
app.py
CHANGED
|
@@ -17,8 +17,6 @@ from langchain.chains.conversation.memory import ConversationalBufferWindowMemor
|
|
| 17 |
|
| 18 |
MODEL_NAME = "mn40_as"
|
| 19 |
|
| 20 |
-
session_token = os.environ["SESSION_TOKEN"]
|
| 21 |
-
|
| 22 |
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
| 23 |
model = get_mobilenet(width_mult=NAME_TO_WIDTH(MODEL_NAME), pretrained_name=MODEL_NAME)
|
| 24 |
model.to(device)
|
|
@@ -28,7 +26,8 @@ cached_audio_class = "c"
|
|
| 28 |
template = None
|
| 29 |
prompt = None
|
| 30 |
chain = None
|
| 31 |
-
|
|
|
|
| 32 |
|
| 33 |
def format_classname(classname):
|
| 34 |
return classname.capitalize()
|
|
@@ -62,60 +61,60 @@ def audio_tag(
|
|
| 62 |
# Print audio tagging top probabilities
|
| 63 |
|
| 64 |
label = labels[sorted_indexes[0]]
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
formatted_classname = format_classname(audio_class)
|
| 74 |
-
if cached_audio_class != formatted_classname:
|
| 75 |
-
|
| 76 |
-
cached_audio_class = formatted_classname
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
template
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
return
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
btn = gr.Button("Run")
|
| 119 |
-
btn.click(fn=audio_tag, inputs=[aud, inp], outputs=out)
|
| 120 |
-
|
| 121 |
-
demo.launch()
|
|
|
|
| 17 |
|
| 18 |
MODEL_NAME = "mn40_as"
|
| 19 |
|
|
|
|
|
|
|
| 20 |
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
| 21 |
model = get_mobilenet(width_mult=NAME_TO_WIDTH(MODEL_NAME), pretrained_name=MODEL_NAME)
|
| 22 |
model.to(device)
|
|
|
|
| 26 |
template = None
|
| 27 |
prompt = None
|
| 28 |
chain = None
|
| 29 |
+
formatted_classname = "tree"
|
| 30 |
+
chain =
|
| 31 |
|
| 32 |
def format_classname(classname):
|
| 33 |
return classname.capitalize()
|
|
|
|
| 61 |
# Print audio tagging top probabilities
|
| 62 |
|
| 63 |
label = labels[sorted_indexes[0]]
|
| 64 |
+
formatted_classname = label
|
| 65 |
+
chain = construct_langchain(formatted_classname)
|
| 66 |
+
return formatted_classname
|
| 67 |
+
|
| 68 |
+
def construct_langchain(audio_class):
|
| 69 |
+
if cached_audio_class != audio_class:
|
| 70 |
+
cached_audio_class = audio_class
|
| 71 |
+
prefix = f"""You are going to act as a magical tool that allows for humans to communicate with non-human entities like
|
| 72 |
+
rocks, crackling fire, trees, animals, and the wind. In order to do this, we're going to provide you the human's text input for the conversation.
|
| 73 |
+
The goal is for you to embody that non-human entity and converse with the human.
|
| 74 |
+
|
| 75 |
+
Examples:
|
| 76 |
|
| 77 |
+
Non-human Entity: Tree
|
| 78 |
+
Human Input: Hello tree
|
| 79 |
+
Tree: Hello human, I am a tree
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
Let's begin:
|
| 82 |
+
Non-human Entity: {audio_class}"""
|
| 83 |
+
|
| 84 |
+
suffix = f'''Source: {audio_class}
|
| 85 |
+
Length of Audio in Seconds: 2 seconds
|
| 86 |
+
Human Input: {userText}
|
| 87 |
+
{audio_class} Response:'''
|
| 88 |
+
template = prefix + suffix
|
| 89 |
+
|
| 90 |
+
prompt = PromptTemplate(
|
| 91 |
+
input_variables=["history", "human_input"],
|
| 92 |
+
template=template
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
chatgpt_chain = LLMChain(
|
| 96 |
+
llm=OpenAI(temperature=.5, openai_api_key=session_token),
|
| 97 |
+
prompt=prompt,
|
| 98 |
+
verbose=True,
|
| 99 |
+
memory=ConversationalBufferWindowMemory(k=2, ai_prefix=audio_class),
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
return chatgpt_chain
|
| 103 |
+
|
| 104 |
+
def predict(input, history=[]):
|
| 105 |
+
formatted_message = chain.predict(human_input=input)
|
| 106 |
+
history.append(formatted_message)
|
| 107 |
+
return formatted_message, history
|
| 108 |
+
|
| 109 |
+
demo = gr.Interface(
|
| 110 |
+
audio_tag,
|
| 111 |
+
[
|
| 112 |
+
gr.Audio(source="upload", type="filepath", label="Your audio"),
|
| 113 |
+
gr.Textbox(),
|
| 114 |
+
],
|
| 115 |
+
fn=predict,
|
| 116 |
+
inputs=["text", "state"],
|
| 117 |
+
outputs=["chatbot", "state"],
|
| 118 |
+
title="AnyChat",
|
| 119 |
+
description="Non-Human entities have many things to say, listen to them!",
|
| 120 |
+
).launch(debug=True)
|
|
|
|
|
|
|
|
|
|
|
|