Spaces:
Sleeping
Sleeping
還有model 的設定覆蓋
Browse files
app.py
CHANGED
|
@@ -24,13 +24,17 @@ api_base = os.getenv("OPENAI_API_BASE")
|
|
| 24 |
|
| 25 |
# ✅ 合併多檔案文字
|
| 26 |
|
| 27 |
-
def extract_text_from_files(files):
|
| 28 |
from openai import OpenAI
|
| 29 |
import os
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
client = OpenAI(api_key=api_key, base_url=api_base)
|
|
|
|
|
|
|
| 34 |
|
| 35 |
image_exts = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"}
|
| 36 |
merged_text = ""
|
|
@@ -42,7 +46,7 @@ def extract_text_from_files(files):
|
|
| 42 |
# 圖片文件直接使用 AI 處理
|
| 43 |
if ext in image_exts:
|
| 44 |
logger.info(f"使用 AI 處理圖片文件: {filename}")
|
| 45 |
-
md = MarkItDown(llm_client=client, llm_model=
|
| 46 |
result = md.convert(f.name)
|
| 47 |
merged_text += result.text_content + "\n"
|
| 48 |
logger.info(f"圖片文件處理完成: {filename}, 提取文本長度: {len(result.text_content)}")
|
|
@@ -64,7 +68,7 @@ def extract_text_from_files(files):
|
|
| 64 |
else:
|
| 65 |
# 文本太少,可能是掃描版 PDF,使用 AI 處理
|
| 66 |
logger.info(f"普通處理提取文本不足,切換到 AI 處理: {filename}")
|
| 67 |
-
md = MarkItDown(llm_client=client, llm_model=
|
| 68 |
result = md.convert(f.name)
|
| 69 |
merged_text += result.text_content + "\n"
|
| 70 |
logger.info(f"AI 處理完成: {filename}, 提取文本長度: {len(result.text_content)}")
|
|
@@ -81,15 +85,18 @@ def extract_text_from_files(files):
|
|
| 81 |
|
| 82 |
def generate_questions(files, question_types, num_questions, lang, llm_key, baseurl, model=None):
|
| 83 |
try:
|
| 84 |
-
|
| 85 |
-
trimmed_text = text[:200000]
|
| 86 |
-
|
| 87 |
-
# 優先使用 UI 傳入值,否則用 .env
|
| 88 |
key = llm_key if llm_key else os.getenv("OPENAI_API_KEY")
|
| 89 |
base = baseurl if baseurl else os.getenv("OPENAI_API_BASE")
|
| 90 |
-
model_name = model if model else "gpt-4.1"
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
if not key or not base:
|
| 94 |
return {"error": "⚠️ 請輸入 LLM key 與 baseurl"}, ""
|
| 95 |
client = OpenAI(api_key=key, base_url=base)
|
|
@@ -377,7 +384,7 @@ import uvicorn
|
|
| 377 |
|
| 378 |
def build_gradio_blocks():
|
| 379 |
with gr.Blocks() as demo:
|
| 380 |
-
gr.Markdown("# 📄 通用 AI
|
| 381 |
|
| 382 |
with gr.Row():
|
| 383 |
with gr.Column():
|
|
|
|
| 24 |
|
| 25 |
# ✅ 合併多檔案文字
|
| 26 |
|
| 27 |
+
def extract_text_from_files(files, llm_key=None, baseurl=None, model_name=None):
|
| 28 |
from openai import OpenAI
|
| 29 |
import os
|
| 30 |
|
| 31 |
+
# 優先使用 UI 傳入值,否則用 .env,最後才用默認值
|
| 32 |
+
api_key = llm_key if llm_key else os.getenv("OPENAI_API_KEY")
|
| 33 |
+
api_base = baseurl if baseurl else os.getenv("OPENAI_API_BASE")
|
| 34 |
+
model = model_name if model_name else os.getenv("OPENAI_MODEL", "gpt-4.1")
|
| 35 |
client = OpenAI(api_key=api_key, base_url=api_base)
|
| 36 |
+
|
| 37 |
+
logger.info(f"extract_text_from_files 使用的 API 設定 - Base URL: {api_base[:10] if api_base else 'None'}..., Model: {model}")
|
| 38 |
|
| 39 |
image_exts = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"}
|
| 40 |
merged_text = ""
|
|
|
|
| 46 |
# 圖片文件直接使用 AI 處理
|
| 47 |
if ext in image_exts:
|
| 48 |
logger.info(f"使用 AI 處理圖片文件: {filename}")
|
| 49 |
+
md = MarkItDown(llm_client=client, llm_model=model)
|
| 50 |
result = md.convert(f.name)
|
| 51 |
merged_text += result.text_content + "\n"
|
| 52 |
logger.info(f"圖片文件處理完成: {filename}, 提取文本長度: {len(result.text_content)}")
|
|
|
|
| 68 |
else:
|
| 69 |
# 文本太少,可能是掃描版 PDF,使用 AI 處理
|
| 70 |
logger.info(f"普通處理提取文本不足,切換到 AI 處理: {filename}")
|
| 71 |
+
md = MarkItDown(llm_client=client, llm_model=model)
|
| 72 |
result = md.convert(f.name)
|
| 73 |
merged_text += result.text_content + "\n"
|
| 74 |
logger.info(f"AI 處理完成: {filename}, 提取文本長度: {len(result.text_content)}")
|
|
|
|
| 85 |
|
| 86 |
def generate_questions(files, question_types, num_questions, lang, llm_key, baseurl, model=None):
|
| 87 |
try:
|
| 88 |
+
# 優先使用 UI 傳入值,否則用 .env,最後才用默認值
|
|
|
|
|
|
|
|
|
|
| 89 |
key = llm_key if llm_key else os.getenv("OPENAI_API_KEY")
|
| 90 |
base = baseurl if baseurl else os.getenv("OPENAI_API_BASE")
|
| 91 |
+
model_name = model if model else os.getenv("OPENAI_MODEL", "gpt-4.1")
|
| 92 |
+
|
| 93 |
+
logger.info(f"generate_questions 使用的 API 設定 - Base URL: {base[:10] if base else 'None'}..., Model: {model_name}")
|
| 94 |
|
| 95 |
+
# 將 UI 傳入的值傳遞給 extract_text_from_files 函數
|
| 96 |
+
text = extract_text_from_files(files, llm_key=key, baseurl=base, model_name=model_name)
|
| 97 |
+
trimmed_text = text[:200000]
|
| 98 |
+
|
| 99 |
+
# 這裡不需要再次設置 key, base 和 model_name,因為已經在上面設置過了
|
| 100 |
if not key or not base:
|
| 101 |
return {"error": "⚠️ 請輸入 LLM key 與 baseurl"}, ""
|
| 102 |
client = OpenAI(api_key=key, base_url=base)
|
|
|
|
| 384 |
|
| 385 |
def build_gradio_blocks():
|
| 386 |
with gr.Blocks() as demo:
|
| 387 |
+
gr.Markdown("# 📄 通用 AI 出題系統(支援多檔、多語、匯出格式)- DAVID888 ")
|
| 388 |
|
| 389 |
with gr.Row():
|
| 390 |
with gr.Column():
|