Spaces:
Sleeping
Sleeping
Commit ·
a9e6507
1
Parent(s): 6690ca7
feat : v1
Browse files- .env.example +1 -0
- .gitignore +37 -0
- Dockerfile +36 -0
- app.py +54 -0
- data/recipes.jsonl +2 -0
- db/chroma_init.py +35 -0
- models/model_wrapper.py +10 -0
- rag.py +122 -0
- requirements.txt +13 -0
.env.example
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
GOOGLE_API_KEY=your_google_ai_api_key
|
.gitignore
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 1. 虛擬環境目錄
|
| 2 |
+
.venv
|
| 3 |
+
venv/
|
| 4 |
+
env/
|
| 5 |
+
/site-packages
|
| 6 |
+
|
| 7 |
+
.env
|
| 8 |
+
|
| 9 |
+
# 2. Python 編譯快取
|
| 10 |
+
__pycache__/*
|
| 11 |
+
*.pyc
|
| 12 |
+
*.pyd
|
| 13 |
+
*.so
|
| 14 |
+
|
| 15 |
+
# 3. 測試和文件
|
| 16 |
+
.pytest_cache/
|
| 17 |
+
htmlcov/
|
| 18 |
+
.coverage
|
| 19 |
+
|
| 20 |
+
# 4. 編輯器/IDE 檔案 (可選,依據您使用的工具)
|
| 21 |
+
.idea/ # PyCharm/IntelliJ 專案檔案
|
| 22 |
+
.vscode/
|
| 23 |
+
.vscode/* # VS Code 設定 (如果不想共享)
|
| 24 |
+
*.swp # Vim 臨時檔案
|
| 25 |
+
|
| 26 |
+
# 5. 您的靜態/媒體檔案 (保持原樣,但使用更精確的模式)
|
| 27 |
+
/static/ # 忽略整個 static 資料夾
|
| 28 |
+
/logs/ # 日誌檔案
|
| 29 |
+
*.log
|
| 30 |
+
*.sqlite3 # 如果使用 SQLite 資料庫
|
| 31 |
+
|
| 32 |
+
# 6. db
|
| 33 |
+
/chroma_db/*
|
| 34 |
+
|
| 35 |
+
*.pth
|
| 36 |
+
*.pt
|
| 37 |
+
*.ckpt
|
Dockerfile
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================
|
| 2 |
+
# Base
|
| 3 |
+
# ============================================
|
| 4 |
+
FROM python:3.10-slim
|
| 5 |
+
|
| 6 |
+
# ============================================
|
| 7 |
+
# System-level deps
|
| 8 |
+
# ============================================
|
| 9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 10 |
+
build-essential \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# ============================================
|
| 14 |
+
# Working directory
|
| 15 |
+
# ============================================
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# ============================================
|
| 19 |
+
# Install Python dependencies
|
| 20 |
+
# --------------------------------------------
|
| 21 |
+
# COPY requirements first for better caching
|
| 22 |
+
# ============================================
|
| 23 |
+
COPY requirements.txt .
|
| 24 |
+
|
| 25 |
+
RUN pip install --upgrade pip && \
|
| 26 |
+
pip install --no-cache-dir -r requirements.txt
|
| 27 |
+
|
| 28 |
+
# ============================================
|
| 29 |
+
# Copy application source code
|
| 30 |
+
# ============================================
|
| 31 |
+
COPY . .
|
| 32 |
+
|
| 33 |
+
# ============================================
|
| 34 |
+
# Run FastAPI server
|
| 35 |
+
# ============================================
|
| 36 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import os
|
| 3 |
+
import uvicorn
|
| 4 |
+
from fastapi import FastAPI, HTTPException
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
from rag import build_rag_chain
|
| 8 |
+
from db.chroma_init import load_recipes_to_chroma
|
| 9 |
+
|
| 10 |
+
load_dotenv()
|
| 11 |
+
app = FastAPI(
|
| 12 |
+
title="Recipe RAG API (Gemini 2.0)",
|
| 13 |
+
description="使用 FastAPI + LangChain + Chroma + HuggingFace Embeddings 的服務",
|
| 14 |
+
version="1.0.0"
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# 啟動時若 Chroma 尚未建立,嘗試載入 sample data(可按需註解)
|
| 18 |
+
CHROMA_DIR = "./chroma_db"
|
| 19 |
+
if not os.path.exists(CHROMA_DIR) or not os.listdir(CHROMA_DIR):
|
| 20 |
+
try:
|
| 21 |
+
print("load : chroma init")
|
| 22 |
+
load_recipes_to_chroma()
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print("Warning: chroma init failed:", e)
|
| 25 |
+
|
| 26 |
+
# 建立 RAG chain(可共用)
|
| 27 |
+
RAG_CHAIN = build_rag_chain(k=4)
|
| 28 |
+
|
| 29 |
+
class RecipeRequest(BaseModel):
|
| 30 |
+
query: str
|
| 31 |
+
|
| 32 |
+
@app.get("/")
|
| 33 |
+
def root():
|
| 34 |
+
return {"message": "COOK BOOK API ready!"}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@app.post("/recipe")
|
| 38 |
+
def generate_recipe(req: RecipeRequest):
|
| 39 |
+
if not req.query or req.query.strip() == "":
|
| 40 |
+
raise HTTPException(status_code=400, detail="query 必填")
|
| 41 |
+
try:
|
| 42 |
+
# 使用 LangChain chain 呼叫
|
| 43 |
+
output = RAG_CHAIN( req.query )
|
| 44 |
+
# output 結構通常包含 'result' 和 'source_documents'
|
| 45 |
+
result_text = output.get("result") or output.get("output_text") or str(output)
|
| 46 |
+
sources = []
|
| 47 |
+
for d in output.get("source_documents", []):
|
| 48 |
+
sources.append({"content": d.page_content, "meta": getattr(d, "metadata", {})})
|
| 49 |
+
return {"query": req.query, "recipe": result_text, "sources": sources}
|
| 50 |
+
except Exception as e:
|
| 51 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 52 |
+
|
| 53 |
+
if __name__ == "__main__":
|
| 54 |
+
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
|
data/recipes.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"title":"番茄炒蛋","ingredients":"番茄、雞蛋、蔥、鹽、糖、油","steps":"1. 番茄切塊 2. 打蛋 3. 炒蛋 4. 加番茄同炒至熟 5. 調味"}
|
| 2 |
+
{"title":"三杯雞","ingredients":"雞腿、米酒、醬油、麻油、糖、九層塔","steps":"1. 雞腿切塊 2. 爆香 3. 加調味料悶煮 4. 收汁 5. 起鍋加九層塔"}
|
db/chroma_init.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# db/chroma_init.py
|
| 2 |
+
from langchain_chroma import Chroma
|
| 3 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
+
|
| 5 |
+
CHROMA_DIR = "./chroma_db"
|
| 6 |
+
MODEL_NAME="sentence-transformers/all-MiniLM-L6-v2"
|
| 7 |
+
|
| 8 |
+
def load_recipes_to_chroma(texts):
|
| 9 |
+
|
| 10 |
+
# 使用 HuggingFace Embeddings(推薦)
|
| 11 |
+
embeddings = HuggingFaceEmbeddings(
|
| 12 |
+
model_name=MODEL_NAME
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# 建立 Chroma DB
|
| 16 |
+
db = Chroma.from_texts(
|
| 17 |
+
texts=texts,
|
| 18 |
+
embedding=embeddings,
|
| 19 |
+
persist_directory=CHROMA_DIR
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
db.persist()
|
| 23 |
+
return db
|
| 24 |
+
|
| 25 |
+
# --- 建立 Chroma DB ---
|
| 26 |
+
def get_vectordb():
|
| 27 |
+
embeddings = HuggingFaceEmbeddings(
|
| 28 |
+
model_name=MODEL_NAME
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
db = Chroma(
|
| 32 |
+
persist_directory=CHROMA_DIR,
|
| 33 |
+
embedding_function=embeddings
|
| 34 |
+
)
|
| 35 |
+
return db
|
models/model_wrapper.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 4 |
+
|
| 5 |
+
GENAI_MODEL = "gemini-2.0-flash"
|
| 6 |
+
|
| 7 |
+
def get_llm():
|
| 8 |
+
# LangChain 的 Google Generative API 封裝
|
| 9 |
+
llm = ChatGoogleGenerativeAI(model=GENAI_MODEL)
|
| 10 |
+
return llm
|
rag.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# rag.py
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
# 新增:用於定義結構化輸出格式
|
| 6 |
+
from typing import List, Dict, Any
|
| 7 |
+
from pydantic import BaseModel, Field
|
| 8 |
+
|
| 9 |
+
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
|
| 10 |
+
from langchain_core.prompts import PromptTemplate # 確保導入這個,用於 HumanMessage 的子模板
|
| 11 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 12 |
+
from models.model_wrapper import get_llm
|
| 13 |
+
from db.chroma_init import get_vectordb
|
| 14 |
+
from langchain_core.exceptions import OutputParserException
|
| 15 |
+
|
| 16 |
+
# --- 🎯 食譜 Pydantic 結構定義 ---
|
| 17 |
+
class Ingredient(BaseModel):
|
| 18 |
+
name: str = Field(description="材料名稱,例如:豬五花肉")
|
| 19 |
+
amount: str = Field(description="份量/數量,例如:300克 或 2大匙")
|
| 20 |
+
|
| 21 |
+
class Recipe(BaseModel):
|
| 22 |
+
"""用於儲存完整食譜的 JSON 結構"""
|
| 23 |
+
title: str = Field(description="食譜的繁體中文名稱")
|
| 24 |
+
ingredients: List[Ingredient] = Field(description="所有材料的清單")
|
| 25 |
+
steps: List[str] = Field(description="詳細的步驟說明")
|
| 26 |
+
notes: List[str] = Field(description="食譜的額外提醒或替代食材建議")
|
| 27 |
+
|
| 28 |
+
# 定義一個包含多個 Recipe 的容器
|
| 29 |
+
class RecipeList(BaseModel):
|
| 30 |
+
"""用於儲存多個完整食譜的列表結構"""
|
| 31 |
+
recipes: List[Recipe] = Field(
|
| 32 |
+
description="一個包含多個食譜物件(Recipe)的列表。"
|
| 33 |
+
)
|
| 34 |
+
# -----------------------------------
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# --- 將 PROMPT_TMPL 內容拆分 ---
|
| 38 |
+
# 1. 系統提示 (System Prompt) - 放置角色、格式和主要限制
|
| 39 |
+
SYS_TMPL = """
|
| 40 |
+
你是一位專業中文料理師傅。
|
| 41 |
+
|
| 42 |
+
請生成一個完整、易懂的繁體中文菜譜。
|
| 43 |
+
請嚴格以 **純 JSON 格式** 輸出,且內容必須符合指定的 Schema。
|
| 44 |
+
|
| 45 |
+
以下是輸出限制:
|
| 46 |
+
- 優先使用資料庫中的資訊
|
| 47 |
+
- 禁止憑空編造不存在的材料
|
| 48 |
+
- 若必要,請在 notes 加上替代食材建議
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
# 2. 用戶/輸入提示 (Human Prompt) - 放置變量輸入
|
| 52 |
+
HUMAN_TMPL = """
|
| 53 |
+
使用者需求:{query}
|
| 54 |
+
|
| 55 |
+
請根據需求和資料庫上下文,生成**兩個**不同的食譜。
|
| 56 |
+
|
| 57 |
+
資料庫上下文:
|
| 58 |
+
{context}
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
# --- 建立 ChatPromptTemplate ---
|
| 62 |
+
# A. 建立 System Message Template
|
| 63 |
+
system_message_prompt = SystemMessagePromptTemplate.from_template(SYS_TMPL)
|
| 64 |
+
|
| 65 |
+
# B. 建立 Human Message Template (使用 PromptTemplate 包裝變量)
|
| 66 |
+
human_message_prompt = HumanMessagePromptTemplate(
|
| 67 |
+
prompt=PromptTemplate(
|
| 68 |
+
input_variables=["query", "context"],
|
| 69 |
+
template=HUMAN_TMPL
|
| 70 |
+
)
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# C. 組合 ChatPromptTemplate
|
| 74 |
+
base_prompt = ChatPromptTemplate.from_messages([
|
| 75 |
+
system_message_prompt,
|
| 76 |
+
human_message_prompt
|
| 77 |
+
])
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# --- 建立 RAG function ---
|
| 81 |
+
def build_rag_chain(k=4):
|
| 82 |
+
|
| 83 |
+
db = get_vectordb()
|
| 84 |
+
llm = get_llm()
|
| 85 |
+
|
| 86 |
+
retriever = db.as_retriever(search_kwargs={"k": k})
|
| 87 |
+
structured_llm = llm.with_structured_output(RecipeList)
|
| 88 |
+
|
| 89 |
+
def get_context_and_query(query: str):
|
| 90 |
+
docs = retriever.invoke(query)
|
| 91 |
+
context = "\n".join([d.page_content for d in docs])
|
| 92 |
+
return {"context": context, "query": query, "docs": docs}
|
| 93 |
+
|
| 94 |
+
# 調整 rag 函式以返回更清晰的結果
|
| 95 |
+
def rag(query: str):
|
| 96 |
+
|
| 97 |
+
# ----------------------------------------------------
|
| 98 |
+
# 1. 執行檢索 (RunnableLambda 讓我們在 LCEL 外執行並拿到中間結果)
|
| 99 |
+
try:
|
| 100 |
+
input_data = get_context_and_query(query)
|
| 101 |
+
docs = input_data.pop("docs")
|
| 102 |
+
|
| 103 |
+
# ----------------------------------------------------
|
| 104 |
+
# 2. 建立 PromptValue
|
| 105 |
+
prompt_value = base_prompt.invoke(input_data)
|
| 106 |
+
|
| 107 |
+
# ----------------------------------------------------
|
| 108 |
+
# 3. 呼叫 LLM 並解析 JSON 輸出
|
| 109 |
+
answer = structured_llm.invoke(prompt_value)
|
| 110 |
+
result_dict = answer.dict()
|
| 111 |
+
final_list = result_dict.get('recipes', [])
|
| 112 |
+
|
| 113 |
+
return {"result": final_list, "source_documents": docs}
|
| 114 |
+
|
| 115 |
+
except OutputParserException as e:
|
| 116 |
+
return {"result": {"error": "LLM 輸出格式錯誤,無法解析 JSON"}, "source_documents": docs}
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
return {"result": {"error": f"LLM 呼叫失敗: {e}"}, "source_documents": docs}
|
| 120 |
+
# ----------------------------------------------------
|
| 121 |
+
|
| 122 |
+
return rag
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
langchain
|
| 4 |
+
langchain-core
|
| 5 |
+
langchain-community
|
| 6 |
+
langchain-google-genai
|
| 7 |
+
langchain-huggingface
|
| 8 |
+
langchain-chroma
|
| 9 |
+
chromadb
|
| 10 |
+
sentence-transformers
|
| 11 |
+
huggingface-hub
|
| 12 |
+
python-dotenv
|
| 13 |
+
pydantic
|