Spaces:

CHUNYU0505
/

RAG_Test_System

Sleeping

App Files Files Community

CHUNYU0505 commited on Aug 30

Commit

4cc15ed

verified ·

1 Parent(s): 5abe977

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -24

app.py CHANGED Viewed

@@ -1,54 +1,47 @@
 # app.py
-import os, glob
 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_huggingface import HuggingFaceEmbeddings
 from docx import Document as DocxDocument
-from transformers import pipeline
 from huggingface_hub import login, snapshot_download
 import gradio as gr
 # -------------------------------
-# 1. 模型清單（公開可用）
 # -------------------------------
 MODEL_MAP = {
     "Auto": None,
-    "BTLM-3B-8K": "cerebras/btlm-3b-8k-base",  # 需要 trust_remote_code=True
-    "DistilGPT2": "distilgpt2",                 # 小模型
-    "BART-Base": "facebook/bart-base"           # 小模型
 }
-# -------------------------------
-# 2. Hugging Face 登入
-# -------------------------------
 HF_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
 if HF_TOKEN:
     login(token=HF_TOKEN)
     print("✅ 已使用 HUGGINGFACEHUB_API_TOKEN 登入 Hugging Face")
-else:
-    print("⚠️ 沒有 HUGGINGFACEHUB_API_TOKEN，下載速度可能受限")
 # -------------------------------
-# 3. 預先下載模型到 ./models/
 # -------------------------------
 LOCAL_MODEL_DIRS = {}
 for name, repo in MODEL_MAP.items():
-    if repo is None:
         continue
     try:
         local_dir = f"./models/{repo.split('/')[-1]}"
         if not os.path.exists(local_dir):
             print(f"⬇️ 正在下載模型 {repo} ...")
             snapshot_download(repo_id=repo, token=HF_TOKEN, local_dir=local_dir)
-        else:
-            print(f"✅ 已存在模型 {repo} -> {local_dir}")
         LOCAL_MODEL_DIRS[name] = local_dir
     except Exception as e:
         print(f"⚠️ 模型 {repo} 無法下載: {e}")
 # -------------------------------
-# 4. pipeline 載入（含 trust_remote_code）
 # -------------------------------
 _loaded_pipelines = {}
@@ -56,26 +49,38 @@ def get_pipeline(model_name):
     if model_name not in _loaded_pipelines:
         local_path = LOCAL_MODEL_DIRS.get(model_name)
         print(f"🔄 正在載入模型 {model_name} from {local_path}")
         generator = pipeline(
             "text-generation",
-            model=local_path,
-            tokenizer=local_path,
-            device_map="cpu",
-            trust_remote_code=True   # <<<< 加這個才能跑 BTLM
         )
         _loaded_pipelines[model_name] = generator
     return _loaded_pipelines[model_name]
-def call_local_inference(model_name, prompt, max_new_tokens=512):
     try:
         generator = get_pipeline(model_name)
-        outputs = generator(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7)
         return outputs[0]["generated_text"]
     except Exception as e:
         return f"（生成失敗：{e}）"
 # -------------------------------
-# 5. Auto 模式邏輯
 # -------------------------------
 def pick_model_auto(segments):
     if segments <= 3:
@@ -107,7 +112,7 @@ def generate_article_progress(query, model_name, segments=5):
     yield "\n\n".join(all_text), docx_file, f"本次使用模型：{selected_model}"
 # -------------------------------
-# 6. Gradio 介面
 # -------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# 佛教經論 RAG 系統 (CPU 免費版)")

 # app.py
+import os, glob, torch
 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_huggingface import HuggingFaceEmbeddings
 from docx import Document as DocxDocument
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from huggingface_hub import login, snapshot_download
 import gradio as gr
 # -------------------------------
+# 1. 模型清單
 # -------------------------------
 MODEL_MAP = {
     "Auto": None,
+    "BTLM-3B-8K": "cerebras/btlm-3b-8k-base",
+    "DistilGPT2": "distilgpt2",
+    "BART-Base": "facebook/bart-base"
 }
 HF_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
 if HF_TOKEN:
     login(token=HF_TOKEN)
     print("✅ 已使用 HUGGINGFACEHUB_API_TOKEN 登入 Hugging Face")
 # -------------------------------
+# 2. 預先下載模型
 # -------------------------------
 LOCAL_MODEL_DIRS = {}
 for name, repo in MODEL_MAP.items():
+    if repo is None:
         continue
     try:
         local_dir = f"./models/{repo.split('/')[-1]}"
         if not os.path.exists(local_dir):
             print(f"⬇️ 正在下載模型 {repo} ...")
             snapshot_download(repo_id=repo, token=HF_TOKEN, local_dir=local_dir)
         LOCAL_MODEL_DIRS[name] = local_dir
     except Exception as e:
         print(f"⚠️ 模型 {repo} 無法下載: {e}")
 # -------------------------------
+# 3. pipeline 載入
 # -------------------------------
 _loaded_pipelines = {}
     if model_name not in _loaded_pipelines:
         local_path = LOCAL_MODEL_DIRS.get(model_name)
         print(f"🔄 正在載入模型 {model_name} from {local_path}")
+        if model_name == "BTLM-3B-8K":
+            tokenizer = AutoTokenizer.from_pretrained(local_path, trust_remote_code=True)
+            model = AutoModelForCausalLM.from_pretrained(local_path, trust_remote_code=True)
+        else:
+            tokenizer = AutoTokenizer.from_pretrained(local_path)
+            model = AutoModelForCausalLM.from_pretrained(local_path)
         generator = pipeline(
             "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            device= -1  # 強制 CPU
         )
         _loaded_pipelines[model_name] = generator
     return _loaded_pipelines[model_name]
+def call_local_inference(model_name, prompt, max_new_tokens=256):
     try:
         generator = get_pipeline(model_name)
+        outputs = generator(
+            prompt,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            temperature=0.7
+        )
         return outputs[0]["generated_text"]
     except Exception as e:
         return f"（生成失敗：{e}）"
 # -------------------------------
+# 4. Auto 模式
 # -------------------------------
 def pick_model_auto(segments):
     if segments <= 3:
     yield "\n\n".join(all_text), docx_file, f"本次使用模型：{selected_model}"
 # -------------------------------
+# 5. Gradio 介面
 # -------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# 佛教經論 RAG 系統 (CPU 免費版)")