Spaces:

Studiobotxyz
/

StudioGPT2

Sleeping

App Files Files Community

Studiobotxyz commited on Jan 7, 2024

Commit

4ab0120

1 Parent(s): 67ee73b

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -39

app.py CHANGED Viewed

@@ -1,14 +1,11 @@
 import os
-os.system("pip install flask ctransformers gradio")
 import time
 import requests
 from tqdm import tqdm
-from flask import Flask, request, jsonify
 import ctransformers
 import gradio as gr
-app = Flask(__name__)
 if not os.path.isfile('llama-2-7b.ggmlv3.q4_K_S.bin'):
     print("Downloading Model from HuggingFace")
     url = "https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/llama-2-7b.ggmlv3.q4_K_S.bin"
@@ -31,16 +28,6 @@ config.config.stop = ["\n"]
 llm = ctransformers.AutoModelForCausalLM.from_pretrained('./llama-2-7b.ggmlv3.q4_K_S.bin', config=config)
 print("Loaded model")
-def time_it(func):
-    def wrapper(*args, **kwargs):
-        start_time = time.time()
-        result = func(*args, **kwargs)
-        end_time = time.time()
-        execution_time = end_time - start_time
-        print(f"Function '{func.__name__}' took {execution_time:.6f} seconds to execute.")
-        return result
-    return wrapper
 def complete(prompt, stop=["User", "Assistant"]):
     tokens = llm.tokenize(prompt)
     token_count = 0
@@ -58,31 +45,9 @@ def complete(prompt, stop=["User", "Assistant"]):
     print('\n')
     return [output, token_count]
-@app.route('/generate', methods=['POST'])
-def generate_response():
-    data = request.get_json()
-    question = data.get('question', '')
-    start_time = time.time()
     output, token_count = complete(f'User: {question}. Can you please answer this as informatively but concisely as possible.\nAssistant: ')
-    end_time = time.time()
-    execution_time = end_time - start_time
-    response = {
-        'output': output,
-        'token_count': token_count,
-        'execution_time': execution_time,
-        'tokens_per_second': token_count / execution_time
-    }
-    return jsonify(response)
-def greet(name):
-    _, token_count = complete(f'User: {name}. Can you please answer this as informatively but concisely as possible.\nAssistant: ')
-    return f"Response: {name} | Tokens: {token_count}"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 iface.launch()
-if __name__ == '__main__':
-    app.run(debug=True)

 import os
+os.system("pip install ctransformers gradio")
 import time
 import requests
 from tqdm import tqdm
 import ctransformers
 import gradio as gr
 if not os.path.isfile('llama-2-7b.ggmlv3.q4_K_S.bin'):
     print("Downloading Model from HuggingFace")
     url = "https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/llama-2-7b.ggmlv3.q4_K_S.bin"
 llm = ctransformers.AutoModelForCausalLM.from_pretrained('./llama-2-7b.ggmlv3.q4_K_S.bin', config=config)
 print("Loaded model")
 def complete(prompt, stop=["User", "Assistant"]):
     tokens = llm.tokenize(prompt)
     token_count = 0
     print('\n')
     return [output, token_count]
+def greet(question):
     output, token_count = complete(f'User: {question}. Can you please answer this as informatively but concisely as possible.\nAssistant: ')
+    return f"Response: {output} | Tokens: {token_count}"
+iface = gr.Interface(fn=greet, inputs="text", outputs="text", live=True)
 iface.launch()