dVd5t commited on
Commit
4177824
Β·
verified Β·
1 Parent(s): 2941755

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -13
app.py CHANGED
@@ -34,6 +34,14 @@ class ExtractRequest(BaseModel):
34
  llm_provider: str
35
  llm_api_key: Optional[str] = None
36
 
 
 
 
 
 
 
 
 
37
  @app.get("/", response_class=HTMLResponse)
38
  async def root():
39
  return """
@@ -41,30 +49,99 @@ async def root():
41
  <head>
42
  <title>Aging Theory Analyzer API</title>
43
  <style>
44
- body { font-family: Arial; max-width: 800px; margin: 50px auto; padding: 20px; }
 
 
 
 
 
 
45
  h1 { color: #2c3e50; }
46
- .status { color: #27ae60; font-weight: bold; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  </style>
48
  </head>
49
  <body>
50
  <h1>🧬 Aging Theory Analyzer API</h1>
51
  <p class="status">βœ… Backend is running on Hugging Face Spaces!</p>
52
- <h2>Available Endpoints:</h2>
53
- <ul>
54
- <li><a href="/docs">/docs</a> - API documentation</li>
55
- <li><a href="/api/health">/api/health</a> - Health check</li>
56
- </ul>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  </body>
58
  </html>
59
  """
60
 
61
  @app.get("/api/health")
62
  async def health():
63
- return {"status": "ok", "version": "1.0.0", "timestamp": time.time()}
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  @app.post("/api/collect")
66
  async def collect_papers(request: CollectRequest):
67
- logger.info(f"Collection: {len(request.queries)} queries")
 
68
 
69
  all_papers = []
70
  papers_per_query = request.max_papers // len(request.queries) if request.queries else request.max_papers
@@ -75,9 +152,11 @@ async def collect_papers(request: CollectRequest):
75
  try:
76
  papers = await api.search_papers(query, request.year_from, request.year_to, papers_per_query)
77
  all_papers.extend(papers)
 
78
  except Exception as e:
79
  logger.error(f"Error: {e}")
80
 
 
81
  seen = set()
82
  unique_papers = []
83
  for paper in all_papers:
@@ -86,21 +165,77 @@ async def collect_papers(request: CollectRequest):
86
  seen.add(key)
87
  unique_papers.append(paper)
88
 
 
 
89
  return {
90
  "papers": [asdict(p) for p in unique_papers],
91
  "total": len(unique_papers),
92
- "api_calls": len(unique_papers) // 100 + 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  }
94
 
95
  @app.post("/api/extract")
96
  async def extract_data(request: ExtractRequest):
97
- logger.info(f"Extraction: {len(request.papers)} papers")
 
 
 
 
 
 
98
 
99
  if not request.llm_api_key:
100
  raise HTTPException(status_code=400, detail="API key required")
101
 
 
 
 
 
102
  extractor = GroqExtractor(request.llm_api_key)
103
  results = []
 
104
 
105
  for idx, paper in enumerate(request.papers, 1):
106
  try:
@@ -108,11 +243,13 @@ async def extract_data(request: ExtractRequest):
108
  results.append(result)
109
  logger.info(f"[{idx}/{len(request.papers)}] βœ“")
110
  except Exception as e:
 
111
  logger.error(f"[{idx}] Failed: {e}")
112
 
113
  return {
114
  "results": results,
115
  "total": len(results),
116
  "theories_found": sum(1 for r in results if r.get('q2') == 'Yes'),
117
- "failed": len(request.papers) - len(results)
118
- }
 
 
34
  llm_provider: str
35
  llm_api_key: Optional[str] = None
36
 
37
+ class BatchExtractRequest(BaseModel):
38
+ """ΠžΠ±Ρ€Π°Π±ΠΎΡ‚ΠΊΠ° ΠΎΠ΄Π½ΠΎΠ³ΠΎ Π±Π°Ρ‚Ρ‡Π° статСй"""
39
+ papers: List[dict]
40
+ llm_provider: str
41
+ llm_api_key: str
42
+ batch_number: int
43
+ total_batches: int
44
+
45
  @app.get("/", response_class=HTMLResponse)
46
  async def root():
47
  return """
 
49
  <head>
50
  <title>Aging Theory Analyzer API</title>
51
  <style>
52
+ body {
53
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
54
+ max-width: 800px;
55
+ margin: 50px auto;
56
+ padding: 20px;
57
+ background: #f8f9fa;
58
+ }
59
  h1 { color: #2c3e50; }
60
+ .status { color: #27ae60; font-weight: bold; font-size: 1.2em; }
61
+ .endpoint {
62
+ background: white;
63
+ padding: 15px;
64
+ margin: 10px 0;
65
+ border-radius: 8px;
66
+ border-left: 4px solid #3498db;
67
+ }
68
+ a { color: #3498db; text-decoration: none; }
69
+ a:hover { text-decoration: underline; }
70
+ .features {
71
+ background: #e8f4f8;
72
+ padding: 15px;
73
+ border-radius: 8px;
74
+ margin: 20px 0;
75
+ }
76
  </style>
77
  </head>
78
  <body>
79
  <h1>🧬 Aging Theory Analyzer API</h1>
80
  <p class="status">βœ… Backend is running on Hugging Face Spaces!</p>
81
+
82
+ <div class="features">
83
+ <h3>✨ Features:</h3>
84
+ <ul>
85
+ <li>πŸ” Paper collection from Semantic Scholar</li>
86
+ <li>πŸ€– LLM-powered Q1-Q9 extraction with Groq</li>
87
+ <li>πŸ“¦ Batch processing for large datasets (10 papers/batch)</li>
88
+ <li>⚑ Automatic rate limiting</li>
89
+ </ul>
90
+ </div>
91
+
92
+ <h2>πŸ“š Available Endpoints:</h2>
93
+ <div class="endpoint">
94
+ <strong>GET</strong> <a href="/docs">/docs</a> - Interactive API documentation (Swagger UI)
95
+ </div>
96
+ <div class="endpoint">
97
+ <strong>GET</strong> <a href="/api/health">/api/health</a> - Health check endpoint
98
+ </div>
99
+ <div class="endpoint">
100
+ <strong>POST</strong> /api/collect - Collect papers from Semantic Scholar
101
+ </div>
102
+ <div class="endpoint">
103
+ <strong>POST</strong> /api/extract/batch - Extract Q1-Q9 from one batch (10 papers)
104
+ </div>
105
+ <div class="endpoint">
106
+ <strong>POST</strong> /api/extract - Legacy: Extract all papers at once (not recommended for >50 papers)
107
+ </div>
108
+
109
+ <h2>πŸš€ Quick Start:</h2>
110
+ <ol>
111
+ <li>Copy this URL: <code id="url"></code></li>
112
+ <li>Paste in frontend "Backend URL" field</li>
113
+ <li>Get free Groq API key: <a href="https://console.groq.com/keys" target="_blank">console.groq.com/keys</a></li>
114
+ <li>Add queries and start analyzing!</li>
115
+ </ol>
116
+
117
+ <p><small>Version 1.0.0 | Powered by FastAPI + Groq LLM (Llama 3.1 70B) + Semantic Scholar API</small></p>
118
+
119
+ <script>
120
+ document.getElementById('url').textContent = window.location.origin;
121
+ </script>
122
  </body>
123
  </html>
124
  """
125
 
126
  @app.get("/api/health")
127
  async def health():
128
+ """Health check endpoint"""
129
+ return {
130
+ "status": "ok",
131
+ "version": "1.0.0",
132
+ "timestamp": time.time(),
133
+ "message": "Aging Theory Analyzer Backend is running",
134
+ "features": {
135
+ "paper_collection": "Semantic Scholar API",
136
+ "llm_extraction": "Groq (Llama 3.1 70B)",
137
+ "batch_processing": "10 papers per batch"
138
+ }
139
+ }
140
 
141
  @app.post("/api/collect")
142
  async def collect_papers(request: CollectRequest):
143
+ """Collect papers from Semantic Scholar"""
144
+ logger.info(f"Collection: {len(request.queries)} queries, {request.year_from}-{request.year_to}")
145
 
146
  all_papers = []
147
  papers_per_query = request.max_papers // len(request.queries) if request.queries else request.max_papers
 
152
  try:
153
  papers = await api.search_papers(query, request.year_from, request.year_to, papers_per_query)
154
  all_papers.extend(papers)
155
+ logger.info(f"Collected {len(papers)} for '{query}'")
156
  except Exception as e:
157
  logger.error(f"Error: {e}")
158
 
159
+ # Deduplicate
160
  seen = set()
161
  unique_papers = []
162
  for paper in all_papers:
 
165
  seen.add(key)
166
  unique_papers.append(paper)
167
 
168
+ logger.info(f"Collection complete: {len(unique_papers)} unique papers")
169
+
170
  return {
171
  "papers": [asdict(p) for p in unique_papers],
172
  "total": len(unique_papers),
173
+ "api_calls": len(unique_papers) // 100 + 1,
174
+ "recommended_batch_size": 10
175
+ }
176
+
177
+ @app.post("/api/extract/batch")
178
+ async def extract_batch(request: BatchExtractRequest):
179
+ """
180
+ Extract Q1-Q9 from ONE batch of papers (recommended: 10 papers per batch)
181
+
182
+ Frontend should call this endpoint multiple times for large datasets:
183
+ - Split papers into batches of 10
184
+ - Call this endpoint for each batch
185
+ - Update progress bar after each batch
186
+ """
187
+ logger.info(f"Batch {request.batch_number}/{request.total_batches}: {len(request.papers)} papers with {request.llm_provider}")
188
+
189
+ if not request.llm_api_key:
190
+ raise HTTPException(status_code=400, detail="LLM API key required")
191
+
192
+ extractor = GroqExtractor(request.llm_api_key)
193
+ results = []
194
+ failed = []
195
+
196
+ for idx, paper in enumerate(request.papers, 1):
197
+ try:
198
+ result = await extractor.extract(paper)
199
+ results.append(result)
200
+ logger.info(f"Batch {request.batch_number}: [{idx}/{len(request.papers)}] βœ“ {paper['title'][:50]}")
201
+ except Exception as e:
202
+ failed.append({
203
+ "paper_id": paper.get('id', 'unknown'),
204
+ "paper_title": paper.get('title', 'unknown'),
205
+ "error": str(e)
206
+ })
207
+ logger.error(f"Batch {request.batch_number}: [{idx}/{len(request.papers)}] βœ— {str(e)[:100]}")
208
+
209
+ return {
210
+ "batch_number": request.batch_number,
211
+ "total_batches": request.total_batches,
212
+ "results": results,
213
+ "processed": len(results),
214
+ "failed": len(failed),
215
+ "failed_papers": failed,
216
+ "theories_found": sum(1 for r in results if r.get('q2') == 'Yes')
217
  }
218
 
219
  @app.post("/api/extract")
220
  async def extract_data(request: ExtractRequest):
221
+ """
222
+ Legacy endpoint: Extract all papers at once
223
+
224
+ ⚠️ NOT RECOMMENDED for large datasets (>50 papers)
225
+ Use /api/extract/batch instead for better reliability
226
+ """
227
+ logger.info(f"Extraction: {len(request.papers)} papers with {request.llm_provider}")
228
 
229
  if not request.llm_api_key:
230
  raise HTTPException(status_code=400, detail="API key required")
231
 
232
+ # Warn if too many papers
233
+ if len(request.papers) > 50:
234
+ logger.warning(f"Large dataset detected ({len(request.papers)} papers). Consider using /api/extract/batch")
235
+
236
  extractor = GroqExtractor(request.llm_api_key)
237
  results = []
238
+ failed_count = 0
239
 
240
  for idx, paper in enumerate(request.papers, 1):
241
  try:
 
243
  results.append(result)
244
  logger.info(f"[{idx}/{len(request.papers)}] βœ“")
245
  except Exception as e:
246
+ failed_count += 1
247
  logger.error(f"[{idx}] Failed: {e}")
248
 
249
  return {
250
  "results": results,
251
  "total": len(results),
252
  "theories_found": sum(1 for r in results if r.get('q2') == 'Yes'),
253
+ "failed": failed_count,
254
+ "warning": "Consider using /api/extract/batch for large datasets" if len(request.papers) > 50 else None
255
+ }