Spaces:
Runtime error
Runtime error
Upload 8 files
Browse files- README.md +36 -12
- app.py +110 -0
- config.yaml +31 -0
- guardrails.py +26 -0
- ingest.py +87 -0
- openai_client.py +24 -0
- repository layout +15 -0
- requirements.txt +9 -0
README.md
CHANGED
|
@@ -1,12 +1,36 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
```markdown
|
| 2 |
+
# IR・ESG開示RAGボット(OpenAI API版・8言語対応)
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
## クイックスタート
|
| 6 |
+
1. `data/pdf/` にIR/ESG PDFを配置
|
| 7 |
+
2. `pip install -r requirements.txt`
|
| 8 |
+
3. `python ingest.py` → `data/index/` 生成
|
| 9 |
+
4. `export OPENAI_API_KEY=...`(必要に応じて `OPENAI_BASE_URL`)
|
| 10 |
+
5. `python app.py` → Gradio UI / `/api/answer`
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
## 埋め込みサンプル
|
| 14 |
+
```html
|
| 15 |
+
<script>
|
| 16 |
+
async function askRag(question, lang="ja"){
|
| 17 |
+
const r = await fetch("https://<your-host>/api/answer",{
|
| 18 |
+
method:"POST", headers:{"Content-Type":"application/json"},
|
| 19 |
+
body: JSON.stringify({question, lang})
|
| 20 |
+
});
|
| 21 |
+
const data = await r.json();
|
| 22 |
+
console.log(data.text, data.citations);
|
| 23 |
+
}
|
| 24 |
+
</script>
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
## モデル推奨
|
| 29 |
+
- 生成: `gpt-4o-mini`
|
| 30 |
+
- 埋め込み: `text-embedding-3-large`
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
## 運用Tips
|
| 34 |
+
- PDF直リンク + `#page=<n>` を `meta.jsonl` に保持すれば、根拠クリックで該当ページに飛べます。
|
| 35 |
+
- 年度更新はPDF差替え→`python ingest.py`。CI/CDで自動化を推奨。
|
| 36 |
+
- ログには個人情報を含めない。
|
app.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from__future__import __annotations
|
| 2 |
+
import os, json,yaml
|
| 3 |
+
from typing import List,Dict,Tuple
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import faiss ,numpy as np
|
| 7 |
+
|
| 8 |
+
from openai_client import embed_texts,chat
|
| 9 |
+
from guardrails import detect_out_of_scope,sanitize,copliance_block,SCOPE_HINT
|
| 10 |
+
|
| 11 |
+
CFG=yaml.safe_load(open("config.yaml",encoding="utf-8"))
|
| 12 |
+
EMB_MODEL=CFG["emb_model"]
|
| 13 |
+
TOP_K=CFG["retrieval"]["top_k"]
|
| 14 |
+
SCORE_TH=CFG["retrieval"]["score_threshold"]
|
| 15 |
+
LLM_MODEL=CFG["llm"]["model"]
|
| 16 |
+
LANGS =CFG["languages"]["preferred"]
|
| 17 |
+
LABELS =CFG["languages"].get("labels",{l: l for l in LANGS})
|
| 18 |
+
|
| 19 |
+
INDEX = faiss.read_index("data/index/index.faiss")
|
| 20 |
+
METAS = [json.loads(l) for l in open("data/index/meta.json", encoding="utf-8")]
|
| 21 |
+
|
| 22 |
+
def embed_query(q:str)->np.ndarray:
|
| 23 |
+
v = np.array(embed_texts([q], EMB_MODEL)[0],dtype=np.float32)
|
| 24 |
+
v = v/(np.linalg.norm(v)+1e-12)
|
| 25 |
+
return v[None,:]
|
| 26 |
+
|
| 27 |
+
def search(q:str, top_k:int =TOP_K)->List[Dict]:
|
| 28 |
+
qv = embed_query(q)
|
| 29 |
+
sims, idxs = INDEX.search(qv, top_k*4)
|
| 30 |
+
sims, idxs = sims[0], idxs[0]
|
| 31 |
+
picked = []
|
| 32 |
+
seen = set()
|
| 33 |
+
for score, idx in zip(sims, idxs):
|
| 34 |
+
if score < SCORE_TH :
|
| 35 |
+
continue
|
| 36 |
+
c = METAS[idx]
|
| 37 |
+
key = (c["source"], c["page"])
|
| 38 |
+
if key in seen:
|
| 39 |
+
continue
|
| 40 |
+
seen.add(key)
|
| 41 |
+
picked.append({**c, "score": float(score)})
|
| 42 |
+
if len(picked) >= top_k:
|
| 43 |
+
break
|
| 44 |
+
return picked
|
| 45 |
+
|
| 46 |
+
def format_context(chunks:List[Dict])->str:
|
| 47 |
+
return"\n".join([
|
| 48 |
+
f"-出典:{c['source']} p.{c['page']} |抜粋: {c['text'][:180].replace('\n', ' ')}... "for c in chunks
|
| 49 |
+
])
|
| 50 |
+
|
| 51 |
+
_LANG_INSTRUCTIONS = {
|
| 52 |
+
"ja":"回答は日本語で出力してください",
|
| 53 |
+
"en":"Answer in English.",
|
| 54 |
+
"zh":"请用中文回答。",
|
| 55 |
+
"ko":"한국어로 대답하십시오.",
|
| 56 |
+
"fr":"Répondez en français.",
|
| 57 |
+
"de":"Antworten Sie auf Deutsch.",
|
| 58 |
+
"es":"Responde en español.",
|
| 59 |
+
"it":"Rispondi in italiano.",
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
def generate_answer(q:str, lang:str)->Tuple[str,str]:
|
| 63 |
+
q=(q or "").strip()
|
| 64 |
+
if not q:
|
| 65 |
+
return "", "質問が入力してください。"{}"
|
| 66 |
+
|
| 67 |
+
if detect_out_of_scope(q):
|
| 68 |
+
return f"{SCOPE_HINT}\nIR/ESG関連の事項についてお尋ねください。","{}"
|
| 69 |
+
|
| 70 |
+
chunks = search(q)
|
| 71 |
+
context = format_context(chunks)
|
| 72 |
+
|
| 73 |
+
lang_note = _LANG_INSTRUCTIONS.get(lang, "Answer in the user's preferred language.")
|
| 74 |
+
user_prompt=(
|
| 75 |
+
"以下のコンテキストのみを根拠に、簡潔かつ正確に回答してください。\n"
|
| 76 |
+
"必ず箇条書きで根拠(文書名とページ)を列挙してください。\n"
|
| 77 |
+
f"{context}\n\n[コンテキスト]\n\n|質問|\n{q}"
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
messages= [
|
| 81 |
+
{"role": "system", "content": CFG["llm"]["system_prompt"]},
|
| 82 |
+
{"role": "user", "content": user_prompt},
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
text=chat(messages,model=LLM_MODEL,max_output_tokens=CFG["llm"]["max_output_tokens"],temperature=CFG["llm"]["temperature"])
|
| 86 |
+
text=sanitize(text)+"\n\n" + complicance_block()
|
| 87 |
+
return text, json.dumps(meta,ensure_ascii=False)
|
| 88 |
+
|
| 89 |
+
with gr.Blocks(fill_height=True,title=CFG.get("app_name","RAG Bot")) as demo:
|
| 90 |
+
gr.Markdown("#IR/ESG開示RAG(OpenAI API)-8言語対応")
|
| 91 |
+
q= gr.Textbox("質問 / Question / 質问 / 질문 / Question / Frage / Pregunta / Domanda",lines=3,placeholder="例:2024年度のGHG排出量(スコープ1-3)は?")
|
| 92 |
+
lang = gr.Dropdown(choices=LANGS,value=LANGS[0],label="回答言語/Output Language")
|
| 93 |
+
ask = gr.Button("回答する// Answer / 回答 / 답변 / Répondre / Antworten / Responder / Rispondere", variant="primary")
|
| 94 |
+
ans = gr.Markdown()
|
| 95 |
+
cites = gr.JSON(label="根拠メタデータ/Citations")
|
| 96 |
+
ask.click(fn=generate_answer, inputs=[q, lang], outputs=[ans, cites])
|
| 97 |
+
|
| 98 |
+
import fastapi
|
| 99 |
+
api = fastapi.FastAPI()
|
| 100 |
+
app = gr.mount_gradio_app(api,demo,path="/")
|
| 101 |
+
|
| 102 |
+
@api.post("/api/answer")
|
| 103 |
+
async def api_answer(payload:dict):
|
| 104 |
+
text,meta=generate_answer(payload.get("question",""), payload.get("lang", "ja"))
|
| 105 |
+
data =json.loads(meta)
|
| 106 |
+
return{"text":text,**data}
|
| 107 |
+
if__name__=="__main__":
|
| 108 |
+
import uvicorn
|
| 109 |
+
uvicorn.run(app,host="0.0.0.0",port=int(os.getenv("PORT", 7860)))
|
| 110 |
+
|
config.yaml
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
app_name "IR/ESG RAG Bot(OpenAI,8 languages)
|
| 2 |
+
embedding_model:"text-embedding-3-large"
|
| 3 |
+
normalize_embeddings: true
|
| 4 |
+
chunk:
|
| 5 |
+
target_chars:1400
|
| 6 |
+
overlap_chars:180
|
| 7 |
+
retrieval:
|
| 8 |
+
top_k:6
|
| 9 |
+
score_threshold:0.15
|
| 10 |
+
mmr_lambda:0.3
|
| 11 |
+
llm:
|
| 12 |
+
model:"gpt-4"
|
| 13 |
+
max_output_tokens:700
|
| 14 |
+
temperature:0.2
|
| 15 |
+
system_prompt: |
|
| 16 |
+
あなたは上場企業のIR/ESG開示に特化したRAGアシスタントです。回答は常に根拠(文書名・ページ)を箇条書きで示し、
|
| 17 |
+
文書外の推測や断定は避けます。数値は年度と単位を明記し、最新年度を優先してください。
|
| 18 |
+
language:
|
| 19 |
+
preferred:[ja,en,zh,ko,de,es,it,fr]
|
| 20 |
+
labels:
|
| 21 |
+
ja:"日本語"
|
| 22 |
+
en:"English"
|
| 23 |
+
zh:"中文"
|
| 24 |
+
ko:"한국어"
|
| 25 |
+
de:"Deutsch"
|
| 26 |
+
es:"Español"
|
| 27 |
+
it:"Italiano"
|
| 28 |
+
fr:"Français"
|
| 29 |
+
logging:
|
| 30 |
+
save_qa: true
|
| 31 |
+
path:"logs/qa_log.jsonl"
|
guardrails.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from__future__import __annotations
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
ALLOWED_TOPICS =[
|
| 5 |
+
r"IR",r"投資家",r"決算",r"財務",r"ガバナンス",r"統合報告",r"サステナビリティ",
|
| 6 |
+
r"人的資本",r"リスク",r"セグメント",r"株主",r"資本政策",r"ESG",r"GHG"
|
| 7 |
+
]
|
| 8 |
+
OUT_OF_SCOPE_PATTERNS =[r"採用の可否",r"未公開情報",r"株価予想",r"インサイダー",r"個人情報"]
|
| 9 |
+
PII = re.compile(r"(\d{3}-\d{4})|\d{2,4}-\d{4}|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+)")
|
| 10 |
+
|
| 11 |
+
SCOPE_HINT=(
|
| 12 |
+
"このボットはIR/ESG開示文書(統合報告書、サステナ、決算短信、コーポガバ報告)を根拠とするQ&A専用です。"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
def detect_out_of_scope(q:str)->bool:
|
| 16 |
+
if any(re.search(p,q)for p in OUT_OF_SCOPE_PATTERNS):
|
| 17 |
+
return True
|
| 18 |
+
if not any(re.search(p,q)for p in ALLOWED_TOPICS):
|
| 19 |
+
return True
|
| 20 |
+
return False
|
| 21 |
+
|
| 22 |
+
def sanitize(text:str)->str:
|
| 23 |
+
return PII.sub("[REDACTED]", text)
|
| 24 |
+
|
| 25 |
+
def compliance_block()->str:
|
| 26 |
+
return"※免責:本回答は公開済みIR/ESG資料に基づく情報提供であり、投資判断を目的としません。"
|
ingest.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from__future__import __annotations
|
| 2 |
+
import os,json,pathlib
|
| 3 |
+
from typing import List,Dict,Tuple
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import faiss
|
| 7 |
+
from pypdf import PdfReader
|
| 8 |
+
import yaml
|
| 9 |
+
|
| 10 |
+
from openai_client import embed_texts
|
| 11 |
+
from guardrails import sanitize
|
| 12 |
+
|
| 13 |
+
CFG = yaml.safe_load(open("config.yaml",encoding="utf-8"))
|
| 14 |
+
EMB_MODEL = CFG["embedding_model"]
|
| 15 |
+
NORMALIZE = CFG.get("normalize_embeddings", True)
|
| 16 |
+
|
| 17 |
+
DATA_DIR = pathlib.Path("data")
|
| 18 |
+
PDF_DIR = DATA_DIR / "pdf"
|
| 19 |
+
INDEX_DIR = DATA_DIR / "index"
|
| 20 |
+
META_PATH = INDEX_DIR / "meta.json"
|
| 21 |
+
INDEX_PATH = INDEX_DIR / "faiss.index"
|
| 22 |
+
|
| 23 |
+
def read_pdf_with_pages(path:str)->List[Tuple[int,str]]:
|
| 24 |
+
pages = []
|
| 25 |
+
reader = PdfReader(path)
|
| 26 |
+
for i, p in enumerate(reader.pages):
|
| 27 |
+
txt = p.extract_text()or""
|
| 28 |
+
txt="\n".join([line.strip()for line in txt.splitlines()if line.strip()])
|
| 29 |
+
pages.append((i+1, txt))
|
| 30 |
+
return pages
|
| 31 |
+
|
| 32 |
+
def split_chunks(pages:List[Tuple[int,str]],target_chars:int,overlap_chars:int)->List[Dict]:
|
| 33 |
+
chunks=[]
|
| 34 |
+
for page,text in pages:
|
| 35 |
+
if not text: continue
|
| 36 |
+
start=0
|
| 37 |
+
while start<len(text):
|
| 38 |
+
end = min(len(text),start + target_chars)
|
| 39 |
+
chunk=text[strart:end]
|
| 40 |
+
if len(chunk.strip())>=50:
|
| 41 |
+
chunks.append({"page":page,"text":chunk})
|
| 42 |
+
start = end - overlap_chars if end - overlap_chars > 0 else end
|
| 43 |
+
return chunks
|
| 44 |
+
|
| 45 |
+
def l2_normalize(m:np.ndarray)->np.ndarray:
|
| 46 |
+
if not NORMALIZE:
|
| 47 |
+
return m
|
| 48 |
+
norms=np.linalg.norm(m, axis=1, keepdims=True);1e-12
|
| 49 |
+
return m / norms
|
| 50 |
+
|
| 51 |
+
def build_index():
|
| 52 |
+
INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
| 53 |
+
meta_f = open(META_PATH, "w", encoding="utf-8")
|
| 54 |
+
|
| 55 |
+
target_chars = CFG ["chunk"]["target_chars"]
|
| 56 |
+
overlap_chars = CFG["chunk"]["overlap_chars"]
|
| 57 |
+
|
| 58 |
+
texts=[]
|
| 59 |
+
metas=[]
|
| 60 |
+
|
| 61 |
+
for pdf in sorted(PDF_DIR.glob("*.pdf")):
|
| 62 |
+
print(f"Processing {pdf.name}...")
|
| 63 |
+
pages = read_pdf_with_pages(pdf)
|
| 64 |
+
chunks = split_chunks(pages, target_chars, overlap_chars)
|
| 65 |
+
for c in chunks:
|
| 66 |
+
t=c["text"][:1800]
|
| 67 |
+
texts.append(t)
|
| 68 |
+
meta={"source":pdf.name, "page":c["page"],"text":sanitize(t)}
|
| 69 |
+
metas.append(meta)
|
| 70 |
+
meta_f.write(json.dumps(meta, ensure_ascii=False) + "\n")
|
| 71 |
+
|
| 72 |
+
meta_f.close()
|
| 73 |
+
|
| 74 |
+
if not texts:
|
| 75 |
+
raise SystemExit("Put PDFs under data/pdf/ ")
|
| 76 |
+
|
| 77 |
+
vecs = embed_texts(texts,EMB_MODEL)
|
| 78 |
+
mat=np.array(vecs, dtype="float32")
|
| 79 |
+
mat = l2_normalize(mat)
|
| 80 |
+
|
| 81 |
+
index = faiss.IndexFlatL2(mat.shape[1])
|
| 82 |
+
index.add(mat)
|
| 83 |
+
faiss.write_index(index, str(INDEX_PATH))
|
| 84 |
+
print(f"Index {len(texts)} chunks → {INDEX_PATH}")
|
| 85 |
+
|
| 86 |
+
if __name__ == "__main__":
|
| 87 |
+
build_index()
|
openai_client.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from__future__import annoutations
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
|
| 5 |
+
_client = None
|
| 6 |
+
|
| 7 |
+
def client() -> OpenAI:
|
| 8 |
+
global _client
|
| 9 |
+
if _client is None:
|
| 10 |
+
_client = OpenAI()
|
| 11 |
+
return _client
|
| 12 |
+
|
| 13 |
+
def embed_text(text: List[str],model:str) -> List[List[float]]:
|
| 14 |
+
response = client().embeddings.create(model=model,input=texts)
|
| 15 |
+
return [d.embedding for d in response.data]
|
| 16 |
+
|
| 17 |
+
def chat(messages:List[Dict],model:str,max_output_tokens:int =700,temperature:float=0.2) -> str:
|
| 18 |
+
response = client().responses.create(
|
| 19 |
+
model=model,
|
| 20 |
+
messages=messages,
|
| 21 |
+
max_tokens=max_output_tokens,
|
| 22 |
+
temperature=temperature,
|
| 23 |
+
)
|
| 24 |
+
return response.output_text
|
repository layout
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ir-esg-rag-openai-8lang/
|
| 2 |
+
├── app.py # Gradio UI + FastAPI (embed可) — 8言語対応
|
| 3 |
+
├── ingest.py # PDF→チャンク→OpenAI Embeddings→FAISS
|
| 4 |
+
├── guardrails.py # スコープ/PII/免責
|
| 5 |
+
├── openai_client.py # Responses API呼び出し・共通ユーティリティ
|
| 6 |
+
├── config.yaml # モデル/閾値/言語
|
| 7 |
+
├── requirements.txt
|
| 8 |
+
├── README.md
|
| 9 |
+
├── data/
|
| 10 |
+
│ ├── pdf/
|
| 11 |
+
│ └── index/
|
| 12 |
+
│ ├── faiss.index
|
| 13 |
+
│ └── meta.jsonl
|
| 14 |
+
└── logs/
|
| 15 |
+
└── qa_log.jsonl
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai>=1.40.0
|
| 2 |
+
faiss-cpu==1.8.0.post1
|
| 3 |
+
pypdf==4.2.0
|
| 4 |
+
PyYAML==6.0.2
|
| 5 |
+
gradio==4.44.0
|
| 6 |
+
fastapi==0.112.0
|
| 7 |
+
uvicorn==0.30.5
|
| 8 |
+
httpx==0.27.0
|
| 9 |
+
pydantic==2.8.2
|