File size: 8,224 Bytes
b66ca20
3e53e23
be1258a
ec337b7
4dc1bf6
 
 
ec337b7
 
207f52b
be1258a
207f52b
2dd13f2
207f52b
4dc1bf6
2dd13f2
 
 
 
 
4dc1bf6
 
3e53e23
 
460c48d
 
 
be1258a
460c48d
be1258a
460c48d
 
 
 
 
 
 
 
 
3e53e23
 
 
 
 
460c48d
 
f65a047
 
3e53e23
460c48d
3e53e23
460c48d
3e53e23
 
 
be1258a
 
5e95e05
f65a047
460c48d
f65a047
5e95e05
3e53e23
5e95e05
 
4dc1bf6
b66ca20
4dc1bf6
 
3e53e23
 
4dc1bf6
 
be1258a
4dc1bf6
 
 
3e53e23
 
 
 
 
 
 
 
 
 
 
 
 
207f52b
 
 
d5e7dc0
ec337b7
 
 
 
58d924b
 
207f52b
f65a047
 
 
 
 
 
4dc1bf6
 
 
ec337b7
 
460c48d
 
f65a047
 
 
ec337b7
f65a047
 
5e95e05
f65a047
ec337b7
f65a047
 
 
 
460c48d
f65a047
460c48d
 
 
58d924b
 
 
f65a047
 
 
 
 
 
58d924b
 
 
f65a047
 
ec337b7
f65a047
460c48d
f65a047
ec337b7
58d924b
 
4dc1bf6
3e53e23
 
207f52b
 
2dd13f2
 
 
 
 
 
 
 
 
 
ec337b7
 
 
 
207f52b
ec337b7
 
 
 
 
 
 
 
 
 
 
5e95e05
 
 
 
ec337b7
5e95e05
 
 
 
 
 
 
ec337b7
 
5e95e05
 
be1258a
 
ec337b7
 
58d924b
 
4dc1bf6
 
 
ec337b7
4dc1bf6
 
1b4de71
be1258a
1b4de71
be1258a
f65a047
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# irpr/main.py
from __future__ import annotations
from fastapi import FastAPI, UploadFile, File, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from fastapi.templating import Jinja2Templates
import os, traceback

from irpr.models import IngestRequest, GenerateRequest
from irpr.config import settings

app = FastAPI(title="IR/PR Co-Pilot Pro", version="0.4.5 (OpenAI)")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ===== ディレクトリ選定(deps と同じ戦略) =====

def _ensure_dir_writable(path: str) -> bool:
    try:
        os.makedirs(path, exist_ok=True)
        try:
            os.chmod(path, 0o777)
        except Exception:
            pass
        testfile = os.path.join(path, ".write_test")
        with open(testfile, "wb") as f:
            f.write(b"ok")
        os.remove(testfile)
        return True
    except Exception:
        return False

def _ensure_dir_tree(base: str, sub: str = "simple_index") -> bool:
    if not _ensure_dir_writable(base):
        return False
    return _ensure_dir_writable(os.path.join(base, sub))

def _pick_writable_dir() -> str:
    candidates = []
    if settings.DATA_DIR:
        candidates.append(settings.DATA_DIR)
    candidates += ["/tmp/irpr", "/mnt/data", os.path.join(os.getcwd(), "data")]
    for base in candidates:
        if _ensure_dir_tree(base, "simple_index"):
            return base
    fallback = "/tmp/irpr"
    _ensure_dir_tree(fallback, "simple_index")
    return fallback

BASE_DIR = _pick_writable_dir()
UPLOAD_DIR = os.path.join(BASE_DIR, "uploads")
INDEX_DIR  = os.path.join(BASE_DIR, "simple_index")
_ensure_dir_writable(UPLOAD_DIR)
_ensure_dir_writable(INDEX_DIR)

# 静的配信
os.makedirs("static", exist_ok=True)
os.makedirs("templates", exist_ok=True)
app.mount("/static", StaticFiles(directory="static"), name="static")
app.mount("/files",  StaticFiles(directory=BASE_DIR), name="files")
templates = Jinja2Templates(directory="templates")

# ===== UI / Health =====

@app.get("/", response_class=HTMLResponse)
def ui(request: Request):
    return templates.TemplateResponse("index.html", {"request": request, "base_dir": BASE_DIR})

@app.get("/api/health")
def health():
    return {
        "ok": True,
        "service": "IR/PR Co-Pilot Pro",
        "base_dir": BASE_DIR,
        "upload_dir": UPLOAD_DIR,
        "index_dir": INDEX_DIR,
        "data_dir_env": settings.DATA_DIR,
        "index_dir_env": settings.INDEX_DIR,
        "upload_writable": _ensure_dir_writable(UPLOAD_DIR),
        "index_writable": _ensure_dir_writable(INDEX_DIR),
    }

# ===== ingest =====

@app.post("/ingest/edinet")
def ingest_edinet(req: IngestRequest):
    from rag.ingest import ingest_edinet_for_company
    try:
        n = ingest_edinet_for_company(req.edinet_code, req.date)
        return {"ok": True, "ingested_chunks": n}
    except Exception as e:
        tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))[-2000:]
        return {"ok": False, "error": repr(e), "trace": tb}

def _safe_filename(name: str) -> str:
    name = os.path.basename(name or "").strip()
    if not name:
        name = "upload.pdf"
    return name

@app.post("/ingest/upload")
async def ingest_upload(files: list[UploadFile] = File(...)):
    from rag.ingest import ingest_pdf_bytes
    total = 0; saved = []
    try:
        if not _ensure_dir_writable(UPLOAD_DIR):
            raise PermissionError(f"UPLOAD_DIR not writable: {UPLOAD_DIR}")
        if not files:
            return {"ok": False, "error": "no files", "ingested_chunks": 0, "saved": [], "base_dir": BASE_DIR}

        for f in files:
            fname = _safe_filename(f.filename)
            if not fname.lower().endswith(".pdf"):
                continue

            blob = await f.read()
            if not blob:
                return {"ok": False, "error": f"empty file: {fname}", "ingested_chunks": 0, "saved": [], "base_dir": BASE_DIR}

            os.makedirs(UPLOAD_DIR, exist_ok=True)
            try:
                save_path = os.path.join(UPLOAD_DIR, fname)
                with open(save_path, "wb") as w:
                    w.write(blob)
            except Exception as e:
                tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))[-2000:]
                return {"ok": False, "error": f"{repr(e)} at path={os.path.join(UPLOAD_DIR, fname)}", "trace": tb,
                        "ingested_chunks": total, "saved": saved, "base_dir": BASE_DIR}

            source_url = f"/files/uploads/{fname}"

            try:
                added = ingest_pdf_bytes(title=fname, source_url=source_url, pdf_bytes=blob)
            except Exception as e:
                tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))[-2000:]
                return {"ok": False, "error": f"{repr(e)} while ingesting file={fname}",
                        "trace": tb, "ingested_chunks": total, "saved": saved, "base_dir": BASE_DIR}

            total += added
            saved.append(source_url)

        return {"ok": True, "ingested_chunks": total, "saved": saved, "base_dir": BASE_DIR}

    except Exception as e:
        tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))[-2000:]
        return {"ok": False, "error": repr(e), "trace": tb, "ingested_chunks": total, "saved": saved, "base_dir": BASE_DIR}

# ===== generate =====

@app.post("/generate/all")
def generate_all(req: GenerateRequest):
    # python-pptx 未導入時は明確に案内
    try:
        from export.ppt import build_deck, save_pptx
    except ModuleNotFoundError:
        return {
            "ok": False,
            "error": "python-pptx が未インストールのため PPTX を生成できません。requirements.txt に `python-pptx`, `lxml`, `Pillow` を追加して再起動してください。",
            "base_dir": BASE_DIR
        }

    try:
        from generators.summary import make_summary
        from generators.qa import make_qa
        from export.qa_csv import save_qa_csv

        summary_text, links = make_summary(req.query)
        sections = {
            "highlights": _extract_section(summary_text, "業績ハイライト"),
            "outlook":    _extract_section(summary_text, "見通し"),
            "segments":   _extract_section(summary_text, "セグメント"),
            "finance":    _extract_section(summary_text, "財務"),
            "shareholder":_extract_section(summary_text, "株主還元"),
            "esg":        _extract_section(summary_text, "ESG"),
            "risks":      _extract_section(summary_text, "リスク"),
        }
        qa_list, links2 = make_qa(req.query, 30)

        ppt_path = os.path.join(BASE_DIR, "ir_summary.pptx")
        csv_path = os.path.join(BASE_DIR, "qa_30.csv")

        prs = build_deck(sections, links)
        save_pptx(prs, ppt_path)
        save_qa_csv(qa_list, links2, csv_path)

        def to_url(p):
            rel = os.path.relpath(p, BASE_DIR).replace("\\", "/")
            return f"/files/{rel}"

        return {
            "ok": True,
            "pptx": to_url(ppt_path),
            "qa_csv": to_url(csv_path),
            "links": list(dict.fromkeys((links or []) + (links2 or []))),
            "base_dir": BASE_DIR
        }
    except Exception as e:
        tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))[-2000:]
        return {"ok": False, "error": repr(e), "trace": tb, "base_dir": BASE_DIR}

def _extract_section(text: str, head: str):
    import re
    pat = rf"{head}[::]\s*(.*?)(?:\n[^\n]*[::]|\Z)"
    m = re.search(pat, text, re.S)
    return (m.group(1).strip() if m else "").strip()

from starlette.requests import Request as SRequest
@app.exception_handler(Exception)
async def _all_exception_handler(request: SRequest, exc: Exception):
    tb = "".join(traceback.format_exception(type(exc), exc, exc.__traceback__))[-2000:]
    return JSONResponse(status_code=200, content={"ok": False, "error": repr(exc), "trace": tb, "base_dir": BASE_DIR})