Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,46 +1,73 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
# Get token from environment (set in HF Space secrets)
|
| 6 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
|
|
|
|
|
|
| 7 |
client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def caption_image(image):
|
| 11 |
"""Generate a caption for the image."""
|
|
|
|
|
|
|
| 12 |
if image is None:
|
|
|
|
| 13 |
return "π· Upload an image first!"
|
| 14 |
|
| 15 |
try:
|
|
|
|
| 16 |
result = client.image_to_text(
|
| 17 |
image,
|
| 18 |
model="Salesforce/blip-image-captioning-base",
|
| 19 |
)
|
|
|
|
| 20 |
return result.generated_text
|
| 21 |
except Exception as e:
|
|
|
|
| 22 |
return f"β Error: {e}"
|
| 23 |
|
| 24 |
|
| 25 |
def answer_question(image, question: str):
|
| 26 |
"""Answer a question about the image."""
|
|
|
|
|
|
|
| 27 |
if image is None:
|
|
|
|
| 28 |
return "π· Upload an image first!"
|
| 29 |
if not question.strip():
|
|
|
|
| 30 |
return "β Ask a question!"
|
| 31 |
|
| 32 |
try:
|
|
|
|
| 33 |
result = client.visual_question_answering(
|
| 34 |
image=image,
|
| 35 |
question=question,
|
| 36 |
model="dandelin/vilt-b32-finetuned-vqa",
|
| 37 |
)
|
| 38 |
top = result[0]
|
|
|
|
| 39 |
return f"π€ {top.answer} (confidence: {top.score:.1%})"
|
| 40 |
except Exception as e:
|
|
|
|
| 41 |
return f"β Error: {e}"
|
| 42 |
|
| 43 |
|
|
|
|
|
|
|
| 44 |
with gr.Blocks(title="Vision Chat") as demo:
|
| 45 |
gr.Markdown("# ποΈ Vision Chat\nUpload an image, get a caption, and ask questions about it!")
|
| 46 |
|
|
@@ -60,4 +87,5 @@ with gr.Blocks(title="Vision Chat") as demo:
|
|
| 60 |
question.submit(answer_question, inputs=[img, question], outputs=answer_out)
|
| 61 |
|
| 62 |
demo.queue()
|
|
|
|
| 63 |
demo.launch()
|
|
|
|
| 1 |
import os
|
| 2 |
+
import logging
|
| 3 |
import gradio as gr
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
|
| 6 |
+
# Configure logging
|
| 7 |
+
logging.basicConfig(
|
| 8 |
+
level=logging.INFO,
|
| 9 |
+
format="%(asctime)s | %(levelname)s | %(message)s",
|
| 10 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
| 11 |
+
)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
# Get token from environment (set in HF Space secrets)
|
| 15 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 16 |
+
logger.info(f"HF_TOKEN configured: {bool(HF_TOKEN)}")
|
| 17 |
+
|
| 18 |
client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
|
| 19 |
+
logger.info("InferenceClient initialized")
|
| 20 |
|
| 21 |
|
| 22 |
def caption_image(image):
|
| 23 |
"""Generate a caption for the image."""
|
| 24 |
+
logger.info(f"caption_image() called, image: {image is not None}")
|
| 25 |
+
|
| 26 |
if image is None:
|
| 27 |
+
logger.warning("No image provided")
|
| 28 |
return "π· Upload an image first!"
|
| 29 |
|
| 30 |
try:
|
| 31 |
+
logger.info("Calling image_to_text API...")
|
| 32 |
result = client.image_to_text(
|
| 33 |
image,
|
| 34 |
model="Salesforce/blip-image-captioning-base",
|
| 35 |
)
|
| 36 |
+
logger.info(f"Caption: {result.generated_text[:100]}")
|
| 37 |
return result.generated_text
|
| 38 |
except Exception as e:
|
| 39 |
+
logger.error(f"API error: {e}")
|
| 40 |
return f"β Error: {e}"
|
| 41 |
|
| 42 |
|
| 43 |
def answer_question(image, question: str):
|
| 44 |
"""Answer a question about the image."""
|
| 45 |
+
logger.info(f"answer_question() called, image: {image is not None}, question: {question[:50] if question else 'None'}")
|
| 46 |
+
|
| 47 |
if image is None:
|
| 48 |
+
logger.warning("No image provided")
|
| 49 |
return "π· Upload an image first!"
|
| 50 |
if not question.strip():
|
| 51 |
+
logger.warning("No question provided")
|
| 52 |
return "β Ask a question!"
|
| 53 |
|
| 54 |
try:
|
| 55 |
+
logger.info("Calling visual_question_answering API...")
|
| 56 |
result = client.visual_question_answering(
|
| 57 |
image=image,
|
| 58 |
question=question,
|
| 59 |
model="dandelin/vilt-b32-finetuned-vqa",
|
| 60 |
)
|
| 61 |
top = result[0]
|
| 62 |
+
logger.info(f"Answer: {top.answer} ({top.score:.1%})")
|
| 63 |
return f"π€ {top.answer} (confidence: {top.score:.1%})"
|
| 64 |
except Exception as e:
|
| 65 |
+
logger.error(f"API error: {e}")
|
| 66 |
return f"β Error: {e}"
|
| 67 |
|
| 68 |
|
| 69 |
+
logger.info("Building Gradio interface...")
|
| 70 |
+
|
| 71 |
with gr.Blocks(title="Vision Chat") as demo:
|
| 72 |
gr.Markdown("# ποΈ Vision Chat\nUpload an image, get a caption, and ask questions about it!")
|
| 73 |
|
|
|
|
| 87 |
question.submit(answer_question, inputs=[img, question], outputs=answer_out)
|
| 88 |
|
| 89 |
demo.queue()
|
| 90 |
+
logger.info("Starting Gradio server...")
|
| 91 |
demo.launch()
|