Corin1998 commited on
Commit
e3810ec
·
verified ·
1 Parent(s): a075f76

Upload 19 files

Browse files
Files changed (1) hide show
  1. rag.py +30 -0
rag.py CHANGED
@@ -1,6 +1,8 @@
1
  import pandera as pa
2
  from pandera import Column, DataFrameSchema, Check
3
  import pandas as pd
 
 
4
 
5
  FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
6
  ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
@@ -53,3 +55,31 @@ def validate_financials(df: pd.DataFrame) -> pd.DataFrame:
53
  def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
54
  df = normalize_columns(df, ESG_REQUIRED)
55
  return esg_schema.validate(df, lazy=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandera as pa
2
  from pandera import Column, DataFrameSchema, Check
3
  import pandas as pd
4
+ from pathlib import Path
5
+
6
 
7
  FIN_REQUIRED = ["year","quarter","revenue","ebit","net_income","total_assets","total_equity"]
8
  ESG_REQUIRED = ["year","metric","value","unit","scope","notes"]
 
55
  def validate_esg(df: pd.DataFrame) -> pd.DataFrame:
56
  df = normalize_columns(df, ESG_REQUIRED)
57
  return esg_schema.validate(df, lazy=True)
58
+
59
+ def build_or_update_index(docs_dir: str, index_path: str = "index.json"):
60
+ """
61
+ docs_dir 内のファイルを読み取り、検索用のインデックスを作成(ダミー版)
62
+ """
63
+ docs = []
64
+ for p in Path(docs_dir).glob("*.txt"):
65
+ docs.append({"path": str(p), "content": p.read_text(encoding="utf-8")})
66
+ import json
67
+ Path(index_path).write_text(json.dumps(docs, ensure_ascii=False, indent=2), encoding="utf-8")
68
+ return index_path
69
+
70
+ def answer_with_context(query: str, index_path: str = "index.json", llm=None) -> str:
71
+ """
72
+ クエリに最も関連のある文書を検索し、LLMに渡して回答を生成(ダミー版)
73
+ """
74
+ import json
75
+ if not Path(index_path).exists():
76
+ return "インデックスが存在しません。まず build_or_update_index を実行してください。"
77
+
78
+ docs = json.loads(Path(index_path).read_text(encoding="utf-8"))
79
+ # ここでは単純に最初の文書を返す(本来はベクトル検索など)
80
+ context = docs[0]["content"] if docs else "文書がありません。"
81
+
82
+ if llm:
83
+ return llm.generate_with_context(query, context)
84
+ else:
85
+ return f"【疑似回答】質問: {query}\n関連情報: {context[:200]}..."