File size: 6,103 Bytes
31e3146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

from fastapi import FastAPI, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse

# Import both services
try:
    from split import (
        app as ingestion_app,
        ingest_pdf,
        get_stats as get_ingestion_stats
    )
    print("βœ… Loaded ingestion service (split.py)")
except Exception as e:
    print(f"⚠️ Warning: Could not load ingestion service: {e}")
    ingestion_app = None

try:
    from query_service import (
        app as query_app,
        query_rag,
        query_with_details,
        get_stats as get_query_stats
    )
    print("βœ… Loaded query service (query_service.py)")
except Exception as e:
    print(f"⚠️ Warning: Could not load query service: {e}")
    query_app = None


# Create main application
app = FastAPI(
    title="πŸš€ Multimodal RAG - Combined Service",
    description="Unified service for PDF ingestion and multimodal querying",
    version="1.0.0",
    docs_url="/docs",
    redoc_url="/redoc"
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# ==================== HOME & HEALTH ENDPOINTS ====================

@app.get("/")
async def home():
    """Main endpoint with service information"""
    return {
        "message": "βœ… Multimodal RAG Service is running",
        "status": "healthy",
        "services": {
            "ingestion": "available" if ingestion_app else "unavailable",
            "query": "available" if query_app else "unavailable"
        },
        "endpoints": {
            "ingestion": {
                "ingest_pdf": "POST /ingest",
                "ingestion_stats": "GET /ingest/stats"
            },
            "query": {
                "query": "POST /query?question=YOUR_QUESTION&k=5",
                "query_detailed": "POST /query/details?question=YOUR_QUESTION&k=5",
                "query_stats": "GET /query/stats"
            },
            "documentation": {
                "swagger": "/docs",
                "redoc": "/redoc"
            }
        },
        "features": [
            "PDF text extraction",
            "Table extraction", 
            "Image extraction",
            "Multimodal summarization",
            "Vector similarity search",
            "Context-aware answering"
        ]
    }


@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy",
        "services": {
            "ingestion": ingestion_app is not None,
            "query": query_app is not None
        }
    }


# ==================== INGESTION ENDPOINTS ====================

@app.post("/ingest")
async def ingest_document(file: UploadFile = File(...)):
    """
    Upload and ingest a PDF document
    
    This endpoint processes PDFs and extracts:
    - Text content
    - Tables
    - Images
    
    All content is summarized and stored in the vectorstore.
    """
    if ingestion_app is None:
        return JSONResponse(
            status_code=503,
            content={"error": "Ingestion service not available"}
        )
    
    return await ingest_pdf(file)


@app.get("/ingest/stats")
async def ingestion_stats():
    """Get ingestion service statistics"""
    if ingestion_app is None:
        return JSONResponse(
            status_code=503,
            content={"error": "Ingestion service not available"}
        )
    
    return get_ingestion_stats()


# ==================== QUERY ENDPOINTS ====================

@app.post("/query")
async def query_documents(question: str, k: int = 5):
    """
    Query the RAG system
    
    Args:
        question: The question to ask
        k: Number of documents to retrieve (default: 5)
    
    Returns:
        Answer based on retrieved documents
    """
    if query_app is None:
        return JSONResponse(
            status_code=503,
            content={"error": "Query service not available"}
        )
    
    return await query_rag(question, k)


@app.post("/query/details")
async def query_documents_detailed(question: str, k: int = 5):
    """
    Query with detailed document information
    
    Args:
        question: The question to ask
        k: Number of documents to retrieve (default: 5)
    
    Returns:
        Answer with detailed information about retrieved documents
    """
    if query_app is None:
        return JSONResponse(
            status_code=503,
            content={"error": "Query service not available"}
        )
    
    return await query_with_details(question, k)


@app.get("/query/stats")
async def query_stats():
    """Get query service statistics"""
    if query_app is None:
        return JSONResponse(
            status_code=503,
            content={"error": "Query service not available"}
        )
    
    return get_query_stats()


# ==================== COMBINED STATS ENDPOINT ====================

@app.get("/stats")
async def combined_stats():
    """Get combined statistics from both services"""
    stats = {
        "service": "combined",
        "status": "healthy"
    }
    
    try:
        if query_app:
            query_stats_data = get_query_stats()
            stats["vectorstore"] = query_stats_data
    except Exception as e:
        stats["vectorstore_error"] = str(e)
    
    return stats




@app.on_event("startup")
async def startup_event():
    """Run on application startup"""
    print("\n" + "="*50)
    print("πŸš€ Multimodal RAG Service Starting...")
    print("="*50)
    
    if ingestion_app:
        print("βœ… Ingestion service: READY")
    else:
        print("⚠️  Ingestion service: NOT LOADED")
    
    if query_app:
        print("βœ… Query service: READY")
    else:
        print("⚠️  Query service: NOT LOADED")
    
    print("="*50)
    print("πŸ“‘ Service available at: http://0.0.0.0:7860")
    print("πŸ“š API Documentation: http://0.0.0.0:7860/docs")
    print("="*50 + "\n")


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)