File size: 11,644 Bytes
4b1a31e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36fa73c
 
 
 
 
 
 
 
 
4b1a31e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6ecaa9
 
4b1a31e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369

from datetime import datetime
from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from google import genai
from dotenv import load_dotenv
import os
import json
import logging
import re
import mistune
import requests
from data_service import DataService

# Load envs
load_dotenv()
load_dotenv("../.env.local")

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# Configure Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="3GPP Innovation Backend")
from fastapi.middleware.cors import CORSMiddleware

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # React dev server
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
# Initialize DataService
data_service = DataService()


def ask_gemini(prompt, content):
    MAX_LEN = 10000

    if len(prompt) + len(content) <= MAX_LEN:
        client = genai.Client(api_key=GEMINI_API_KEY)
        response = client.models.generate_content(
            model="gemma-3-27b-it",
            contents=prompt + "\n\n" + content
        )
        return response.text

    chunk = content[:MAX_LEN - len(prompt)]
    rest = content[MAX_LEN - len(prompt):]

    first_answer = ask_gemini(prompt, chunk)
    #remaining_answer = ask_gemini(prompt, rest)

    return first_answer #+ "\n\n" + remaining_answer

PROCESS_PROMPT = """
Task : 
Using the text provided, 
create chunk that are dense in relevant information and minimize near-duplicate or 
loosely related passages, provide a paragraph on whats new to this document using 
the SUGGESTION START and END.
"""

def format_answer(answer):
    return f"We obtained the following methodology:"+answer["methodology"]+"\n\nThe context is :"+answer["context"]+"\n\nThe problem description is :"+answer["problem"]

def extract_json(text: str) -> dict:
    match = re.search(r'\{.*\}', text, re.DOTALL)
    if not match:
        raise ValueError("Aucun JSON trouvé")
    return json.loads(match.group())

# --- Pydantic Models ---

class ProcessRequest(BaseModel):
    file_id: str
    filename: str
    working_group: str
    meeting: str
    type: str # doc.Type
    status: str # doc["TDoc Status"]
    agenda_item: str # doc["Agenda item description"]
    url: str

class InnovationResponse(BaseModel):
    id: str
    file_name: str
    answer: str
    classification: str

class PatternResponse(BaseModel):
    pattern_id: int
    pattern_name: str
    prompt: str

class AnalyzeRequest(BaseModel):
    file_id: str = None
    text: str = None
    pattern_id: int

class AnalyzeResponse(BaseModel):
    id: int
    file_name: str
    content: str
    methodology: str
    context: str
    problem: str
    pattern_name: str

class ClassificationRequest(BaseModel):
    result_id: int
    classification: str

class ResultResponse(BaseModel):
    id: int
    file_name: str
    content: str
    classification: str
    pattern_name: str
    methodology: str
    context: str
    problem: str

# --- Helper Functions ---

def fetch_text_content(req: AnalyzeRequest):
    """
    fetches text content from request or database.
    """
    if req.text:
        return req.text
    elif req.file_id:
        content = data_service.get_file_content(req.file_id)
        if content:
            return content
        else:
            refined = data_service.get_refined_output(req.file_id)
            if refined:
                return refined
    return None

# --- API Endpoints ---

@app.get("/get_all")
def get_all():
    return data_service.get_all_files()

@app.get("/patterns", response_model=list[PatternResponse])
def get_patterns():
    return data_service.get_patterns()

class PatternRequest(BaseModel):
    pattern_name: str
    prompt: str

@app.post("/patterns", response_model=PatternResponse)
def create_pattern(req: PatternRequest):
    try:
        pattern_id = data_service.add_pattern(req.pattern_name, req.prompt)
        return {
            "pattern_id": pattern_id,
            "pattern_name": req.pattern_name,
            "prompt": req.prompt
        }
    except Exception as e:
        logger.error(f"Error creating pattern: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.put("/patterns/{pattern_id}", response_model=PatternResponse)
def update_pattern(pattern_id: int, req: PatternRequest):
    try:
        updated = data_service.update_pattern(pattern_id, req.pattern_name, req.prompt)
        if not updated:
             raise HTTPException(status_code=404, detail="Pattern not found")

        return {
            "pattern_id": pattern_id,
            "pattern_name": req.pattern_name,
            "prompt": req.prompt
        }
    except HTTPException as he:
        raise he
    except Exception as e:
        logger.error(f"Error updating pattern: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/analyze", response_model=AnalyzeResponse)
async def analyze_content(req: AnalyzeRequest):
    print("Start of analyse")
    
    try:
        # 1. Check for existing result (caching strategy)
        existing_result, refined_id, file_name = data_service.get_existing_result(req.file_id)
        
        if existing_result:
            # Cache Hit
            return {
                "id": existing_result['result_id'],
                "file_name": file_name,
                "content": existing_result['content'],
                "methodology": existing_result['methodology'],
                "context": existing_result['context'],
                "problem": existing_result['problem'],
                "pattern_name": existing_result['pattern_name']
            }

        # 2. Cache Miss - Perform Analysis
        print('Performing new analysis')
        text_content = fetch_text_content(req)
        
        if not text_content:
            raise HTTPException(status_code=400, detail="No content found to analyze")

        pattern = data_service.get_pattern(req.pattern_id)
        if not pattern:
            raise HTTPException(status_code=404, detail="Pattern not found")

        pattern_name = pattern['pattern_name']
        pattern_prompt = pattern['prompt']
        
        # Call LLM
        response = ask_gemini(f"Pattern: {pattern_name}\nPrompt: {pattern_prompt}\n\nContext:\n", text_content)
        json_response = extract_json(response)
        answer = format_answer(json_response)
        
        methodology = json_response["methodology"]
        context = json_response["context"]
        problem = json_response["problem"]

        # Save Result
        # We need refined_id. If get_existing_result returned it (even if no result matched), use it. 
        # But get_existing_result returns it.
        # If refined_id is None, it means the file wasn't refined?
        # Ideally fetch_text_content doesn't give refined_id.
        # Let's get refined_id again if missing.
        if not refined_id and req.file_id:
            ref_row = data_service.get_refined_by_file_id(req.file_id)
            if ref_row:
                refined_id = ref_row["refined_id"]

        result_id = data_service.add_result(req.pattern_id, refined_id, answer, methodology, context, problem)

        print("End of analyse")
        return {
            "id": result_id,
            "file_name": file_name,
            "content": answer,
            "methodology": methodology,
            "context": context,
            "problem": problem,
            "pattern_name": pattern_name
        }

    except Exception as e:
        logger.error(f"Error during analysis: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/classify")
def classify_result(req: ClassificationRequest):
    try:
        updated = data_service.update_classification(req.result_id, req.classification)
        if not updated:
            raise HTTPException(status_code=404, detail="Result not found")
        return {"id": req.result_id, "status": "updated"}
    except Exception as e:
        logger.error(f"Error updating classification: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/results", response_model=list[ResultResponse])
def get_results():
    try:
        return data_service.get_all_results_joined()
    except Exception as e:
        logger.error(f"Error fetching results: {e}")
        return []


@app.post("/process", response_model=InnovationResponse)
async def process_document(req: ProcessRequest):
    try:
        existing_content = data_service.get_file_content(req.file_id)
        
        text_content = ""
        content = ""

        if existing_content:
            logger.info(f"File {req.file_id} found in DB.")
            text_content = existing_content
        else:
            try:
                print(req.url)
                hf_response = requests.post(
                    'https://organizedprogrammers-docxtract.hf.space/docs/extract_text_from_url',
                    json={"url": req.url},
                    timeout=30
                )

                if hf_response.status_code == 200:
                    data = hf_response.json()
                    text_content = data.get('text') or data.get('content') or ""
                else:
                    logger.error(f"Failed to fetch content from HF: {hf_response.text}")
                    text_content = "Extraction failed."
            except Exception as e:
                logger.error(f"Error fetching content: {e}")
                text_content = "Extraction error."

            print(req)
            # Add file to DataService
            data_service.add_file({
                "file_id": req.file_id,
                "working_group": req.working_group,
                "meeting": req.meeting,
                "type": req.type,
                "status": req.status,
                "agenda_item": req.agenda_item,
                "content": text_content,
                "filename": req.filename,
                "timestamp": datetime.now().isoformat()
            })

        refined_output = data_service.get_refined_output(req.file_id)
        
        md = mistune.create_markdown()
        if refined_output:
            content = md(refined_output)
        else:
            print(text_content)
            answer = ask_gemini(PROCESS_PROMPT, text_content)

            content = md(answer)

            data_service.add_refined(req.file_id, answer)
            
        return {
            "id": req.file_id,
            "file_name": req.filename,
            "answer": content,
            "classification": "UNCLASSIFIED",
        }

    except Exception as e:
        logger.error(f"Error processing: {e}")
        raise HTTPException(status_code=500, detail=str(e))


# Serve Static Files for Deployment (must be after API routes)
static_dir = "static"
if os.path.exists(static_dir):
    # Mount assets folder
    if os.path.exists(os.path.join(static_dir, "assets")):
        app.mount("/assets", StaticFiles(directory=os.path.join(static_dir, "assets")), name="assets")
    
    # Catch-all for SPA and other static files at root
    @app.get("/{full_path:path}")
    async def serve_frontend(full_path: str):
        # Check if it's a specific file that exists
        file_path = os.path.join(static_dir, full_path)
        if os.path.isfile(file_path):
            return FileResponse(file_path)
            
        # Default to index.html for SPA routing
        return FileResponse(os.path.join(static_dir, "index.html"))