Spaces:
Sleeping
Sleeping
Upload 19 files
Browse files
rag.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
import pandera as pa
|
| 2 |
from pandera import Column, DataFrameSchema, Check
|
| 3 |
import pandas as pd
|
|
|
|
|
|
|
| 4 |
|
| 5 |
FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
|
| 6 |
ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
|
|
@@ -53,3 +55,31 @@ def validate_financials(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 53 |
def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
|
| 54 |
df = normalize_columns(df, ESG_REQUIRED)
|
| 55 |
return esg_schema.validate(df, lazy=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pandera as pa
|
| 2 |
from pandera import Column, DataFrameSchema, Check
|
| 3 |
import pandas as pd
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
|
| 7 |
FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
|
| 8 |
ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
|
|
|
|
| 55 |
def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
|
| 56 |
df = normalize_columns(df, ESG_REQUIRED)
|
| 57 |
return esg_schema.validate(df, lazy=True)
|
| 58 |
+
|
| 59 |
+
def build_or_update_index(docs_dir: str, index_path: str = "index.json"):
|
| 60 |
+
"""
|
| 61 |
+
docs_dir 内のファイルを読み取り、検索用のインデックスを作成(ダミー版)
|
| 62 |
+
"""
|
| 63 |
+
docs = []
|
| 64 |
+
for p in Path(docs_dir).glob("*.txt"):
|
| 65 |
+
docs.append({"path": str(p), "content": p.read_text(encoding="utf-8")})
|
| 66 |
+
import json
|
| 67 |
+
Path(index_path).write_text(json.dumps(docs, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 68 |
+
return index_path
|
| 69 |
+
|
| 70 |
+
def answer_with_context(query: str, index_path: str = "index.json", llm=None) -> str:
|
| 71 |
+
"""
|
| 72 |
+
クエリに最も関連のある文書を検索し、LLMに渡して回答を生成(ダミー版)
|
| 73 |
+
"""
|
| 74 |
+
import json
|
| 75 |
+
if not Path(index_path).exists():
|
| 76 |
+
return "インデックスが存在しません。まず build_or_update_index を実行してください。"
|
| 77 |
+
|
| 78 |
+
docs = json.loads(Path(index_path).read_text(encoding="utf-8"))
|
| 79 |
+
# ここでは単純に最初の文書を返す(本来はベクトル検索など)
|
| 80 |
+
context = docs[0]["content"] if docs else "文書がありません。"
|
| 81 |
+
|
| 82 |
+
if llm:
|
| 83 |
+
return llm.generate_with_context(query, context)
|
| 84 |
+
else:
|
| 85 |
+
return f"【疑似回答】質問: {query}\n関連情報: {context[:200]}..."
|