Spaces:

JunyiAcademy
/

question_image_to_text

Sleeping

App Files Files Community

youngtsai commited on Sep 12, 2024

Commit

6914802

1 Parent(s): 5710160

with gr.Tab("計算紙分析"):

Browse files

Files changed (1) hide show

app.py +99 -12

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ from storage_service import GoogleCloudStorage
 import csv
 import io
 import fitz  # PyMuPDF
@@ -46,18 +48,22 @@ sheets_client = gspread.service_account_from_dict(GSHEET_KEY_DICT)
 CSV_DATA = []
 # 函数定义
-def upload_image_to_gcs(image_path, bucket):
-    # Extract the file name from the path and create a unique filename
-    original_filename = os.path.basename(image_path)
-    unique_filename = f"{int(time.time())}_{original_filename}"
     blob = bucket.blob(unique_filename)
-    # Open the file and upload its contents to GCS
-    with open(image_path, "rb") as image_file:
-        blob.upload_from_file(image_file)
     blob.make_public()
     print("======upload_image_to_gcs=====")
-    print(f"File {original_filename} uploaded to {unique_filename} in GCS.")
     return blob.public_url
 def process_image(image_url):
@@ -226,7 +232,7 @@ def create_csv(processed_data):
     # 设定一个可写的目录路径
     writable_directory = "/tmp/csv_files"
     if not os.path.exists(writable_directory):
-        os.makedirs(writable_directory)  # 如果目录不存在，创建它
     timestamp = int(time.time())
     file_name = f"csv_{timestamp}.csv"
@@ -285,7 +291,7 @@ def process_image_to_data(password, images):
         print("image_url:", image_url)
     question_count = len(processed_data)
-    result = f"圖片處理完成，總共完成 {question_count} 道題目"
     csv_file_path = create_csv(processed_data)
     return processed_data, result, csv_file_path
@@ -521,11 +527,85 @@ def process_qid_to_data(password, q_id):
     return processed_data, result, csv_file_path
 # Gradio界面
 with gr.Blocks() as demo:
     with gr.Row():
         password_input = gr.Textbox(label="密碼", type="password", elem_id="password_input")
     with gr.Tab("Junyi_Q_ID", elem_id="junyi_q_id_tab"):
         with gr.Row():
             gr.Markdown("## Junyi Q_ID")
@@ -599,12 +679,13 @@ with gr.Blocks() as demo:
         with gr.Accordion(open=False):
             with gr.Row():
                 pdf_result_table = gr.Dataframe(
-                    headers=["圖片URL", "文字", "題號", "題目", "選項1", "選項2", "選項3", "選項4", "答案", "提示1", "提示2", "提示3", "提示4", "提示5", "Perseus JSON"],
                     column_widths=[10, 10, 5, 20, 4, 4, 4, 4, 4,4,4,4,4,4, 10],
                     wrap=True
                 )
     submit_button.click(
         fn=process_image_to_data,
         inputs=[password_input, image_input],
@@ -653,6 +734,12 @@ with gr.Blocks() as demo:
         outputs=[junyi_q_id_question_markdown]
     )
-demo.launch()

 import csv
 import io
 import fitz  # PyMuPDF
+import base64
+from PIL import Image
 CSV_DATA = []
 # 函数定义
+def upload_image_to_gcs(image_data, bucket):
+    # Generate a unique filename
+    unique_filename = f"{int(time.time())}_image.jpg"
     blob = bucket.blob(unique_filename)
+    # If image_data is a BytesIO object, upload directly
+    if isinstance(image_data, io.BytesIO):
+        blob.upload_from_file(image_data, content_type='image/jpeg')
+    else:
+        # If it's a file path, open and upload
+        with open(image_data, "rb") as image_file:
+            blob.upload_from_file(image_file)
     blob.make_public()
     print("======upload_image_to_gcs=====")
+    print(f"File uploaded to {unique_filename} in GCS.")
     return blob.public_url
 def process_image(image_url):
     # 设定一个可写的目录路径
     writable_directory = "/tmp/csv_files"
     if not os.path.exists(writable_directory):
+        os.makedirs(writable_directory)  # 如果目录不存在，创它
     timestamp = int(time.time())
     file_name = f"csv_{timestamp}.csv"
         print("image_url:", image_url)
     question_count = len(processed_data)
+    result = f"圖片處理完成總共完成 {question_count} 道題目"
     csv_file_path = create_csv(processed_data)
     return processed_data, result, csv_file_path
     return processed_data, result, csv_file_path
+# 新增函數來處理計算紙截圖
+def process_calculation_image(image_input, base64_input):
+    if base64_input:
+        # 處理 base64 編碼的圖片
+        image_data = base64.b64decode(base64_input.split(',')[1])
+        image = Image.open(io.BytesIO(image_data))
+    elif image_input:
+        if isinstance(image_input, str):
+            # 處理圖片文件路徑
+            image = Image.open(image_input)
+        else:
+            # 處理上傳的圖片文件
+            image = Image.open(image_input.name)
+    else:
+        return None, "請上傳圖片或輸入 base64 圖片字串"
+    # 將圖片轉換為 base64 字串
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    img_str = base64.b64encode(buffered.getvalue()).decode()
+    # 直接使用 OpenAI API 分析圖片
+    analysis = analyze_calculation_image(img_str)
+    return image, analysis
+def analyze_calculation_image(image_base64):
+    user_prompt = """
+    請分析這張學生計算紙的截圖：
+    1. 如果有計算式，請解釋學生的解題步驟，並指出可能的錯誤或改進點。使用 LaTeX 格式表示數學公式。
+    2. 如果沒有計算式，請提供這道題目的標準教學步驟，但不要直接給出答案。同樣使用 LaTeX 格式表示數學公式。
+    3. 無論哪種情況，都請給出鼓勵性的回饋。
+    4. 請使用 Markdown 格式輸出，數學公式請用 $...$ 包裹。
+    5. 請使用繁體中文輸出 ZH-TW
+    """
+    response = OPEN_AI_CLIENT.chat.completions.create(
+      model="gpt-4o",
+      messages=[
+        {
+          "role": "user",
+          "content": [
+            {
+                "type": "text",
+                "text": user_prompt
+            },
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": f"data:image/png;base64,{image_base64}",
+              },
+            },
+          ],
+        }
+      ],
+      max_tokens=1000,
+    )
+    return response.choices[0].message.content
 # Gradio界面
 with gr.Blocks() as demo:
     with gr.Row():
         password_input = gr.Textbox(label="密碼", type="password", elem_id="password_input")
+    with gr.Tab("計算紙分析"):
+        with gr.Row():
+            gr.Markdown("## 學生計算紙分析")
+        with gr.Accordion(open=False):
+        with gr.Row():
+            calculation_image_input = gr.Image(label="上傳計算紙截圖", type="filepath")
+            with gr.Row():
+                calculation_base64_input = gr.Textbox(label="或輸入 base64 圖片字串", lines=3, elem_id="calculation_base64_input")
+        with gr.Row():
+            calculation_submit_button = gr.Button("分析計算紙", elem_id="calculation_submit_button")
+        with gr.Row():
+            calculation_image_display = gr.Image(label="上傳的圖片")
+        with gr.Row():
+            calculation_result = gr.Markdown(label="分析結果", latex_delimiters=[{"left": "$", "right": "$", "display": False}])
     with gr.Tab("Junyi_Q_ID", elem_id="junyi_q_id_tab"):
         with gr.Row():
             gr.Markdown("## Junyi Q_ID")
         with gr.Accordion(open=False):
             with gr.Row():
                 pdf_result_table = gr.Dataframe(
+                    headers=["圖片URL", "文字", "題號", "題目", "選項1", "選���2", "選項3", "選項4", "答案", "提示1", "提示2", "提示3", "提示4", "提示5", "Perseus JSON"],
                     column_widths=[10, 10, 5, 20, 4, 4, 4, 4, 4,4,4,4,4,4, 10],
                     wrap=True
                 )
     submit_button.click(
         fn=process_image_to_data,
         inputs=[password_input, image_input],
         outputs=[junyi_q_id_question_markdown]
     )
+    calculation_submit_button.click(
+        fn=process_calculation_image,
+        inputs=[calculation_image_input, calculation_base64_input],
+        outputs=[calculation_image_display, calculation_result]
+    )
+demo.launch(share=True)