Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,21 +1,17 @@
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import gradio as gr
|
| 3 |
-
import os
|
| 4 |
|
| 5 |
-
|
| 6 |
-
HF_TOKEN = os.environ.get("llama")
|
| 7 |
-
|
| 8 |
-
# Load Gemma model
|
| 9 |
pipe = pipeline(
|
| 10 |
"text-generation",
|
| 11 |
model="tiiuae/falcon-7b-instruct",
|
| 12 |
-
# use_auth_token=HF_TOKEN, # required for gated models
|
| 13 |
device_map="auto"
|
| 14 |
)
|
| 15 |
|
| 16 |
def chat_fn(prompt):
|
| 17 |
result = pipe(prompt, max_new_tokens=200)
|
| 18 |
-
return result[0]["generated_text"]
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
|
| 4 |
+
# Load Falcon model (open, no token required)
|
|
|
|
|
|
|
|
|
|
| 5 |
pipe = pipeline(
|
| 6 |
"text-generation",
|
| 7 |
model="tiiuae/falcon-7b-instruct",
|
|
|
|
| 8 |
device_map="auto"
|
| 9 |
)
|
| 10 |
|
| 11 |
def chat_fn(prompt):
|
| 12 |
result = pipe(prompt, max_new_tokens=200)
|
| 13 |
+
return {"response": result[0]["generated_text"]} # JSON-friendly dict
|
| 14 |
|
| 15 |
+
# Gradio now supports API mode via gr.Request
|
| 16 |
+
app = gr.Interface(fn=chat_fn, inputs="text", outputs="text") # keep text
|
| 17 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|