Spaces:

ake178178
/

beyoureyes

Runtime error

File size: 2,418 Bytes

26caad1
 
 
 
 
 
 
 
d2b41a1
 
 
26caad1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2b41a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26caad1
d2b41a1
 
 
26caad1
d2b41a1
 
 
 
 
26caad1
d2b41a1
 
 
26caad1
d2b41a1
 
 
 
26caad1
d2b41a1

import streamlit as st
import cv2
import torch
from transformers import AutoProcessor, AutoModelForImageClassification
from gtts import gTTS
import os
import time

# 提示用户手动检查权限
st.warning("请确保已允许应用访问您的摄像头。对于 Windows 用户，请检查 [设置 -> 隐私 -> 摄像头]。对于 macOS/iOS 用户，请检查 [系统偏好设置 -> 安全性与隐私 -> 摄像头]。")

# 加载 Hugging Face 模型
@st.cache_resource
def load_model():
    processor = AutoProcessor.from_pretrained("microsoft/resnet-50")
    model = AutoModelForImageClassification.from_pretrained("microsoft/resnet-50")
    return processor, model

processor, model = load_model()

# 设置网页标题
st.title("帮助盲人识别物体的应用")

# 打开摄像头并每10秒拍照一次
st.header("点击下方按钮打开摄像头拍照并识别物体")
run = st.button('打开摄像头并开始识别')

if run:
    st.text("正在打开摄像头，请稍等...")
    camera = cv2.VideoCapture(0)

    # 检查摄像头是否成功打开
    if not camera.isOpened():
        st.error("无法打开摄像头，请检查摄像头权限设置")
    else:
        while True:
            ret, frame = camera.read()
            if not ret:
                st.error("无法读取摄像头画面")
                break

            # 显示摄像头画面
            st.image(frame, channels="BGR")

            # 每10秒进行一次拍照
            time.sleep(10)

            # 保存照片
            img_path = "captured_image.jpg"
            cv2.imwrite(img_path, frame)

            # 读取图像并转换为模型输入
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            inputs = processor(images=image, return_tensors="pt")

            # 进行物体识别
            with torch.no_grad():
                outputs = model(**inputs)
                logits = outputs.logits
                predicted_label = torch.argmax(logits, dim=1).item()

            # 获取识别到的物体标签
            label = model.config.id2label[predicted_label]
            st.write(f"识别到的物体: {label}")

            # 将标签转化为语音
            tts = gTTS(text=f"The object detected is {label}", lang='en')
            tts.save("output.mp3")
            os.system("mpg321 output.mp3")  # 播放语音

        # 释放摄像头
        camera.release()