sciresearch / app.py
sccastillo's picture
document output
5259707
import os
from fastapi import FastAPI, HTTPException, UploadFile, File, Form
from fastapi.responses import HTMLResponse
from pydantic import BaseModel
from dotenv import load_dotenv
import asyncio
# Importar dependencias de OpenAI
from openai import OpenAI
# Import ResearchTeam
from research_team import create_research_team
# Cargar variables de entorno
load_dotenv()
# Modelos Pydantic
class QuestionRequest(BaseModel):
question: str
class DocumentRequest(BaseModel):
document_content: str
class GenerateResponse(BaseModel):
text: str
status: str = "success"
class ResearchResponse(BaseModel):
result: dict
status: str = "success"
# Crear la aplicación FastAPI
app = FastAPI(
title="SciResearch API",
description="Scientific Research FastAPI application with OpenAI integration and Research Team for Claims Anchoring",
version="1.0.0"
)
# Initialize ResearchTeam
research_team = None
def get_research_team():
"""Get or create ResearchTeam instance"""
global research_team
if research_team is None:
research_team = create_research_team()
return research_team
def get_html_with_response(question: str, answer: str, status: str, error_msg: str = None):
"""Generate HTML page with AI response"""
response_content = ""
if status == "success" and answer:
response_content = f'''
<div class="response-section">
<h4>🤖 AI Response:</h4>
<div style="margin-bottom: 15px;">
{answer.replace(chr(10), '<br>')}
</div>
</div>
'''
elif status == "error":
response_content = f'''
<div class="response-section error">
<h4>❌ Error:</h4>
<div style="color: red;">
{error_msg or "Unknown error occurred"}
</div>
</div>
'''
return get_base_html("ai-generator", question, "", response_content, "")
def get_html_with_research_response(document: str, result: dict, status: str, error_msg: str = None):
"""Generate HTML page with research response"""
response_content = ""
if status == "success" and result:
response_content = f'''
<div class="response-section">
<h4>📊 Research Team Results:</h4>
{format_research_results(result)}
</div>
'''
elif status == "error":
response_content = f'''
<div class="response-section error">
<h4>❌ Error:</h4>
<div style="color: red;">
{error_msg or "Unknown error occurred"}
</div>
</div>
'''
return get_base_html("research-team", "", document, "", response_content)
def format_research_results(result: dict) -> str:
"""Format research results as HTML"""
html = ""
# Handle new structure with detailed_analysis and summary_statistics
summary_stats = result.get("summary_statistics", {})
detailed_analysis = result.get("detailed_analysis", {})
# DETAILED ANALYSIS SECTION (Priority Content)
if detailed_analysis:
html += f'''
<div style="margin-bottom: 30px;">
<h5>📋 Detailed Analysis Results</h5>
'''
# Claims Extracted Details
if "claims_extracted" in detailed_analysis:
claims_data = detailed_analysis["claims_extracted"]
all_claims = claims_data.get("all_claims", [])
core_claims = claims_data.get("core_claims", [])
html += f'''
<div style="background: white; padding: 20px; border-radius: 8px; border: 1px solid #e1e5e9; margin-bottom: 20px;">
<h6>🔍 Claims Extraction</h6>
<div style="margin-bottom: 15px;">
<strong>Total Claims Found:</strong> {len(all_claims)} | <strong>Core Claims:</strong> {len(core_claims)}
</div>
<details style="margin-bottom: 10px;">
<summary style="cursor: pointer; font-weight: bold;">View All Claims ({len(all_claims)})</summary>
<div style="margin-top: 10px; max-height: 300px; overflow-y: auto;">
'''
for i, claim in enumerate(all_claims[:10]): # Show first 10 claims
claim_type_color = {"core": "#e74c3c", "supporting": "#f39c12", "contextual": "#3498db"}.get(claim.get("type", "contextual"), "#95a5a6")
html += f'''
<div style="padding: 10px; margin: 5px 0; border-left: 4px solid {claim_type_color}; background: #f8f9fa;">
<strong>Claim {claim.get('id', i+1)}:</strong> {claim.get('text', '')[:200]}{'...' if len(claim.get('text', '')) > 200 else ''}<br>
<small style="color: #666;">Type: {claim.get('type', 'unknown').title()} | Score: {claim.get('importance_score', 0)}</small>
</div>
'''
if len(all_claims) > 10:
html += f'<div style="text-align: center; color: #666; margin-top: 10px;">... and {len(all_claims) - 10} more claims</div>'
html += '''
</div>
</details>
</div>
'''
# Anchoring Results Details
if "anchoring_results" in detailed_analysis:
anchoring_data = detailed_analysis["anchoring_results"]
claims_with_evidence = anchoring_data.get("claims_with_evidence", [])
html += f'''
<div style="background: white; padding: 20px; border-radius: 8px; border: 1px solid #e1e5e9; margin-bottom: 20px;">
<h6>⚓ Claims Anchoring & Evidence</h6>
<details style="margin-bottom: 10px;">
<summary style="cursor: pointer; font-weight: bold;">View Anchoring Results ({len(claims_with_evidence)})</summary>
<div style="margin-top: 10px; max-height: 400px; overflow-y: auto;">
'''
for claim_evidence in claims_with_evidence:
status_color = {"validated": "#27ae60", "partial": "#f39c12", "unsupported": "#e74c3c"}.get(claim_evidence.get("validation_status", "unknown"), "#95a5a6")
html += f'''
<div style="padding: 15px; margin: 10px 0; border: 1px solid #e1e5e9; border-radius: 8px;">
<div style="display: flex; align-items: center; margin-bottom: 10px;">
<strong>Claim {claim_evidence.get('claim_id', '')}:</strong>
<span style="margin-left: 10px; padding: 4px 8px; background: {status_color}; color: white; border-radius: 4px; font-size: 12px;">
{claim_evidence.get('validation_status', 'unknown').title()}
</span>
</div>
<div style="margin-bottom: 10px; color: #333;">
{claim_evidence.get('claim_text', '')[:300]}{'...' if len(claim_evidence.get('claim_text', '')) > 300 else ''}
</div>
<div style="margin-bottom: 10px;">
<strong>Supporting Evidence:</strong> {len(claim_evidence.get('supporting_evidence', []))} passages found
</div>
<div style="margin-bottom: 10px;">
<strong>References:</strong> {len(claim_evidence.get('anchored_references', []))} references anchored
</div>
{f'<div style="font-size: 12px; color: #666;"><strong>Quality Assessment:</strong> {claim_evidence.get("quality_assessment", "")}</div>' if claim_evidence.get("quality_assessment") else ''}
</div>
'''
html += '''
</div>
</details>
</div>
'''
# Formatted References Details
if "formatted_references" in detailed_analysis:
ref_data = detailed_analysis["formatted_references"]
reference_details = ref_data.get("reference_details", [])
html += f'''
<div style="background: white; padding: 20px; border-radius: 8px; border: 1px solid #e1e5e9; margin-bottom: 20px;">
<h6>📚 Formatted References</h6>
<details style="margin-bottom: 10px;">
<summary style="cursor: pointer; font-weight: bold;">View Formatted References ({len(reference_details)})</summary>
<div style="margin-top: 10px; max-height: 300px; overflow-y: auto;">
'''
for ref_detail in reference_details:
status_color = {"complete": "#27ae60", "incomplete": "#f39c12", "not_found": "#e74c3c"}.get(ref_detail.get("completion_status", "unknown"), "#95a5a6")
html += f'''
<div style="padding: 10px; margin: 5px 0; border-left: 4px solid {status_color}; background: #f8f9fa;">
<div style="font-weight: bold; margin-bottom: 5px;">Reference {ref_detail.get('reference_id', '')}</div>
<div style="margin-bottom: 5px;">{ref_detail.get('formatted_citation', '')}</div>
<small style="color: #666;">Type: {ref_detail.get('source_type', 'unknown').title()} | Status: {ref_detail.get('completion_status', 'unknown').title()}</small>
</div>
'''
html += '''
</div>
</details>
</div>
'''
html += '''
</div>
'''
# SUMMARY STATISTICS SECTION (Secondary Information)
if summary_stats:
html += f'''
<div style="margin-bottom: 20px;">
<h5>📊 Summary Statistics</h5>
'''
# Document metadata
if "document_metadata" in summary_stats:
metadata = summary_stats["document_metadata"]
html += f'''
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-bottom: 20px;">
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{metadata.get('product', 'Not detected')}</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">Product</div>
</div>
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{metadata.get('language', 'Not detected')}</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">Language</div>
</div>
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{', '.join(metadata.get('countries', [])) if metadata.get('countries') else 'Not detected'}</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">Countries</div>
</div>
</div>
'''
# Claims analysis summary
if "claims_analysis" in summary_stats:
claims = summary_stats["claims_analysis"]
html += f'''
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-bottom: 20px;">
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{claims.get('total_claims', 0)}</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">Total Claims</div>
</div>
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{claims.get('core_claims_count', 0)}</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">Core Claims</div>
</div>
</div>
'''
# Claims anchoring summary
if "claims_anchoring" in summary_stats and "summary" in summary_stats["claims_anchoring"]:
summary = summary_stats["claims_anchoring"]["summary"]
validation_rate = int((summary.get("validation_rate", 0) * 100))
html += f'''
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-bottom: 20px;">
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{summary.get('total_claims_processed', 0)}</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">Claims Processed</div>
</div>
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{summary.get('successfully_validated', 0)}</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">Successfully Validated</div>
</div>
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{validation_rate}%</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">Validation Rate</div>
</div>
</div>
'''
# Reference formatting summary
if "reference_formatting" in summary_stats:
refs = summary_stats["reference_formatting"]
html += f'''
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px;">
<div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
<div style="font-size: 20px; font-weight: bold; color: #667eea;">{refs.get('total_references', 0)}</div>
<div style="font-size: 12px; color: #666; margin-top: 5px;">References Formatted</div>
</div>
</div>
'''
html += '''
</div>
'''
return html
def create_openai_client():
"""Create and return OpenAI client instance."""
geai_api_key = os.getenv("GEAI_API_KEY")
geai_base_url = os.getenv("GEAI_API_BASE_URL")
return OpenAI(api_key=geai_api_key, base_url=geai_base_url)
def answer_question(question: str):
"""
Función para responder preguntas usando OpenAI LLM
"""
if not question or question.strip() == "":
raise HTTPException(status_code=400, detail="Please provide a question.")
# Obtener API key de OpenAI desde variables de entorno
geai_api_key = os.getenv("GEAI_API_KEY")
geai_base_url = os.getenv("GEAI_API_BASE_URL")
if not geai_api_key:
raise HTTPException(status_code=500, detail="GEAI API key not configured")
# Inicializar OpenAI client
try:
# Create OpenAI client
client = create_openai_client()
# Make the LLM call
completion = client.chat.completions.create(
model="openai/gpt-4o-mini",
messages=[{"role": "user", "content": f"Answer the following question clearly and concisely: {question}"}],
temperature=0.7,
max_tokens=500
)
response = completion.choices[0].message.content
return GenerateResponse(text=response.strip())
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")
def get_base_html(active_section: str = "ai-generator", question_value: str = "", document_value: str = "", ai_response: str = "", research_response: str = ""):
"""Generate base HTML with optional responses"""
ai_display = "" if active_section == "ai-generator" else "display: none;"
research_display = "" if active_section == "research-team" else "display: none;"
return f"""
<!DOCTYPE html>
<html>
<head>
<title>SciResearch API</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
body {{
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background-color: #f5f7fa;
color: #333;
}}
.app-container {{
display: flex;
min-height: 100vh;
}}
/* Sidebar Styles */
.sidebar {{
width: 300px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 20px;
box-shadow: 2px 0 10px rgba(0,0,0,0.1);
position: fixed;
height: 100vh;
overflow-y: auto;
}}
.sidebar h1 {{
font-size: 24px;
margin-bottom: 10px;
display: flex;
align-items: center;
gap: 10px;
}}
.sidebar p {{
margin-bottom: 30px;
opacity: 0.9;
font-size: 14px;
}}
.sidebar-section {{
margin-bottom: 30px;
}}
.sidebar-section h3 {{
font-size: 16px;
margin-bottom: 15px;
border-bottom: 1px solid rgba(255,255,255,0.3);
padding-bottom: 5px;
}}
.nav-link {{
display: block;
color: rgba(255,255,255,0.8);
text-decoration: none;
padding: 10px 15px;
margin: 5px 0;
border-radius: 8px;
transition: background 0.3s;
}}
.nav-link:hover, .nav-link.active {{
background: rgba(255,255,255,0.2);
color: white;
}}
/* Main Content Styles */
.main-content {{
flex: 1;
margin-left: 300px;
padding: 40px;
background: white;
min-height: 100vh;
}}
.content-header {{
margin-bottom: 30px;
}}
.content-header h2 {{
font-size: 28px;
color: #333;
margin-bottom: 10px;
}}
.content-header p {{
color: #666;
font-size: 16px;
}}
.generator-section {{
background: white;
border-radius: 12px;
padding: 30px;
box-shadow: 0 2px 20px rgba(0,0,0,0.08);
margin-bottom: 20px;
}}
.form-group {{
margin-bottom: 20px;
}}
.form-group label {{
display: block;
font-weight: 600;
margin-bottom: 8px;
color: #333;
}}
input[type="text"], textarea {{
width: 100%;
padding: 12px 16px;
border: 2px solid #e1e5e9;
border-radius: 8px;
font-size: 14px;
transition: border-color 0.3s, box-shadow 0.3s;
font-family: inherit;
}}
input[type="text"]:focus, textarea:focus {{
outline: none;
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
}}
textarea {{
height: 200px;
resize: vertical;
}}
.btn {{
padding: 12px 24px;
border: none;
border-radius: 8px;
cursor: pointer;
font-size: 14px;
font-weight: 600;
transition: all 0.3s;
display: inline-flex;
align-items: center;
gap: 8px;
}}
.btn-primary {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
}}
.btn-primary:hover {{
transform: translateY(-2px);
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4);
}}
.btn-secondary {{
background: linear-gradient(135deg, #36d1dc 0%, #5b86e5 100%);
color: white;
}}
.btn-secondary:hover {{
transform: translateY(-2px);
box-shadow: 0 4px 15px rgba(54, 209, 220, 0.4);
}}
.response-section {{
margin-top: 25px;
padding: 20px;
background: #f8f9fa;
border-radius: 8px;
border-left: 4px solid #667eea;
}}
.response-section.error {{
border-left-color: #dc3545;
}}
.response-section h4 {{
margin-bottom: 15px;
color: #333;
}}
.api-info {{
background: #f8f9fa;
padding: 20px;
border-radius: 8px;
margin-top: 30px;
}}
.api-info h3 {{
margin-bottom: 15px;
color: #333;
}}
.api-info ul {{
list-style: none;
}}
.api-info li {{
padding: 8px 0;
border-bottom: 1px solid #e1e5e9;
}}
.api-info li:last-child {{
border-bottom: none;
}}
.api-info a {{
color: #667eea;
text-decoration: none;
}}
.api-info a:hover {{
text-decoration: underline;
}}
/* Responsive Design */
@media (max-width: 768px) {{
.sidebar {{
width: 100%;
position: relative;
height: auto;
}}
.main-content {{
margin-left: 0;
padding: 20px;
}}
.app-container {{
flex-direction: column;
}}
}}
</style>
</head>
<body>
<div class="app-container">
<!-- Sidebar -->
<div class="sidebar">
<h1>🦀 SciResearch</h1>
<p>Scientific Research FastAPI application with AI integration and Research Team</p>
<div class="sidebar-section">
<h3>📡 Select Generator</h3>
<a href="/" class="nav-link {'active' if active_section == 'ai-generator' else ''}">💬 AI Question Generator</a>
<a href="/?mode=research" class="nav-link {'active' if active_section == 'research-team' else ''}">📄 Research Team Processor</a>
</div>
<div class="sidebar-section">
<h3>📋 Quick Links</h3>
<a href="/docs" class="nav-link">📚 API Documentation</a>
<a href="/api/health" class="nav-link">🔧 Health Endpoint</a>
</div>
</div>
<!-- Main Content -->
<div class="main-content">
<!-- AI Generator Section -->
<div id="ai-generator-section" class="generator-section" style="{ai_display}">
<div class="content-header">
<h2>💬 AI Question Generator</h2>
<p>Ask questions and get AI-powered responses from the research assistant</p>
</div>
<form action="/ask" method="post">
<div class="form-group">
<label for="question">Enter your question:</label>
<input type="text" name="question" id="question"
value="{question_value}"
placeholder="What would you like to know about scientific research?"
required />
</div>
<button type="submit" class="btn btn-primary">
🚀 Submit Question
</button>
</form>
{ai_response}
</div>
<!-- Research Team Section -->
<div id="research-team-section" class="generator-section" style="{research_display}">
<div class="content-header">
<h2>📄 Research Team Document Processor</h2>
<p>Process documents for claims anchoring and reference formatting using the AI research team</p>
</div>
<form action="/process" method="post">
<div class="form-group">
<label for="document">Paste your document content:</label>
<textarea name="document_content" id="document"
placeholder="Paste the content of your research document here..."
required>{document_value}</textarea>
</div>
<button type="submit" class="btn btn-secondary">
🔬 Process Document
</button>
</form>
{research_response}
</div>
</div>
</div>
</body>
</html>
"""
@app.get("/", response_class=HTMLResponse)
def read_root(mode: str = None):
"""
Main HTML interface - supports switching between AI generator and research team
"""
if mode == "research":
return get_base_html("research-team")
else:
return get_base_html("ai-generator")
@app.get("/api/hello")
def greet_json():
"""
Endpoint que devuelve un saludo en formato JSON
"""
return {"message": "¡Hola mundo desde SciResearch!", "status": "success"}
@app.get("/api/health")
def health_check():
"""
Endpoint para verificar el estado de la aplicación
"""
geai_configured = bool(os.getenv("GEAI_API_KEY")) and bool(os.getenv("GEAI_API_BASE_URL"))
return {
"status": "healthy",
"service": "sciresearch",
"version": "1.0.0",
"geai_configured": geai_configured,
"research_team_available": True
}
@app.post("/api/generate", summary="Answer user questions using OpenAI", tags=["AI Generate"], response_model=GenerateResponse)
def inference(request: QuestionRequest):
"""
Endpoint para generar respuestas a preguntas usando OpenAI LLM
"""
return answer_question(question=request.question)
@app.post("/ask", response_class=HTMLResponse)
def ask_question_form(question: str = Form(...)):
"""
Form submission endpoint for questions - returns HTML response
"""
try:
result = answer_question(question)
answer_text = result.text
status = "success"
error_msg = None
except Exception as e:
answer_text = ""
status = "error"
error_msg = str(e)
return get_html_with_response(question, answer_text, status, error_msg)
@app.post("/process", response_class=HTMLResponse)
def process_document_form(document_content: str = Form(...)):
"""
Form submission endpoint for document processing - returns HTML response
"""
try:
team = get_research_team()
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = loop.run_until_complete(team.process_document(document_content))
status = "success"
error_msg = None
except Exception as e:
result = {}
status = "error"
error_msg = str(e)
return get_html_with_research_response(document_content, result, status, error_msg)
@app.post("/api/research/process", summary="Process document with Research Team", tags=["Research Team"], response_model=ResearchResponse)
async def process_document_research(request: DocumentRequest):
"""
Endpoint para procesar documentos con el Research Team para Claims Anchoring y Reference Formatting
"""
if not request.document_content or request.document_content.strip() == "":
raise HTTPException(status_code=400, detail="Please provide document content.")
try:
# Get research team instance
team = get_research_team()
# Process document
result = await team.process_document(request.document_content)
return ResearchResponse(result=result)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error processing document: {str(e)}")
@app.get("/api/research/status")
def get_research_status():
"""
Endpoint para verificar el estado del Research Team
"""
try:
team = get_research_team()
return {
"status": "ready",
"workflow_available": True,
"agents": {
"analyzer": "ready",
"search_assistant": "ready",
"researcher": "ready",
"editor": "ready"
}
}
except Exception as e:
return {
"status": "error",
"error": str(e),
"workflow_available": False
}