!pip install flask pyngrok transformers accelerate from flask import Flask, request from pyngrok import ngrok, conf import threading from transformers import pipeline # Set your ngrok authtoken conf.get_default().auth_token = "2zXMEMLtjmkTv94ApaPMVfYtYCa_WESER25tBVukfAB8ZDLU" # Initialize text-generation pipeline generator = pipeline( "text-generation", model="open-paws/8B-instruct-chat", torch_dtype="auto", device_map="auto" ) # Flask app setup app = Flask(__name__) @app.route("/") def home(): return "Welcome to the Text Generation API! Use /generate?prompt=your_text" @app.route("/generate") def generate(): prompt = request.args.get("prompt", default="Hello, how are you?", type=str) response = generator( prompt, max_new_tokens=128, do_sample=True, temperature=0.7 ) return response[0]['generated_text'] # Start ngrok tunnel public_url = ngrok.connect(5000) print(" * ngrok tunnel URL:", public_url) # Run Flask app def run(): app.run() thread = threading.Thread(target=run) thread.start()