Spaces:

Corin1998
/

IR_ESG_REPORTER2

Sleeping

App Files Files Community

Corin1998 commited on Aug 20, 2025

Commit

e3810ec

verified ·

1 Parent(s): a075f76

Upload 19 files

Browse files

Files changed (1) hide show

rag.py +30 -0

rag.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import pandera as pa
 from pandera import Column, DataFrameSchema, Check
 import pandas as pd
 FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
 ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
@@ -53,3 +55,31 @@ def validate_financials(df: pd.DataFrame) -> pd.DataFrame:
 def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
     df = normalize_columns(df, ESG_REQUIRED)
     return esg_schema.validate(df, lazy=True)

 import pandera as pa
 from pandera import Column, DataFrameSchema, Check
 import pandas as pd
+from pathlib import Path
 FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
 ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
 def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
     df = normalize_columns(df, ESG_REQUIRED)
     return esg_schema.validate(df, lazy=True)
+def build_or_update_index(docs_dir: str, index_path: str = "index.json"):
+    """
+    docs_dir 内のファイルを読み取り、検索用のインデックスを作成（ダミー版）
+    """
+    docs = []
+    for p in Path(docs_dir).glob("*.txt"):
+        docs.append({"path": str(p), "content": p.read_text(encoding="utf-8")})
+    import json
+    Path(index_path).write_text(json.dumps(docs, ensure_ascii=False, indent=2), encoding="utf-8")
+    return index_path
+def answer_with_context(query: str, index_path: str = "index.json", llm=None) -> str:
+    """
+    クエリに最も関連のある文書を検索し、LLMに渡して回答を生成（ダミー版）
+    """
+    import json
+    if not Path(index_path).exists():
+        return "インデックスが存在しません。まず build_or_update_index を実行してください。"
+    docs = json.loads(Path(index_path).read_text(encoding="utf-8"))
+    # ここでは単純に最初の文書を返す（本来はベクトル検索など）
+    context = docs[0]["content"] if docs else "文書がありません。"
+    if llm:
+        return llm.generate_with_context(query, context)
+    else:
+        return f"【疑似回答】質問: {query}\n関連情報: {context[:200]}..."