Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request, BackgroundTasks | |
| import json | |
| import io | |
| from openai import OpenAI | |
| from supabase import create_client | |
| from typing import List, Dict, Any | |
| import asyncio | |
| import logging | |
| from datetime import datetime | |
| # Initialize logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| app = FastAPI() | |
| client = Client(api_key=os.getenv('OPENAI_API_KEY'),organization=os.getenv('ORG_ID')) | |
| url: str = os.getenv('SUPABASE_URL') | |
| key: str = os.getenv('SUPABASE_KEY') | |
| supabase: Client = create_client(url, key) | |
| async def process_batch_job(dataset: Dict[str, Any], batch_job_id: str): | |
| """ | |
| Background task to process the batch job | |
| """ | |
| try: | |
| logger.info(f"Starting batch processing for job {batch_job_id}") | |
| system_prompt = ''' | |
| Your goal is to extract movie categories from movie descriptions, as well as a 1-sentence summary for these movies. | |
| You will be provided with a movie description, and you will output a json object containing the following information: | |
| { | |
| categories: string[] // Array of categories based on the movie description, | |
| summary: string // 1-sentence summary of the movie based on the movie description | |
| } | |
| Categories refer to the genre or type of the movie, like "action", "romance", "comedy", etc. Keep category names simple and use only lower case letters. | |
| Movies can have several categories, but try to keep it under 3-4. Only mention the categories that are the most obvious based on the description. | |
| ''' | |
| openai_tasks = [] | |
| for ds in dataset.get('data'): | |
| id = ds.get('imdb_id') | |
| description = ds.get('Description') | |
| task = { | |
| "custom_id": f"task-{id}", | |
| "method": "POST", | |
| "url": "/v1/chat/completions", | |
| "body": { | |
| "model": "gpt-4o-mini", | |
| "temperature": 0.1, | |
| "response_format": { | |
| "type": "json_object" | |
| }, | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": system_prompt | |
| }, | |
| { | |
| "role": "user", | |
| "content": description | |
| } | |
| ] | |
| } | |
| } | |
| openai_tasks.append(task) | |
| # Create batch file | |
| json_obj = io.BytesIO() | |
| for obj in openai_tasks: | |
| json_obj.write((json.dumps(obj) + '\n').encode('utf-8')) | |
| batch_file = client.files.create( | |
| file=json_obj, | |
| purpose="batch" | |
| ) | |
| # Create batch job | |
| batch_job = client.batches.create( | |
| input_file_id=batch_file.id, | |
| endpoint="/v1/chat/completions", | |
| completion_window="24h" | |
| ) | |
| # Update status in Supabase | |
| supabase.table("batch_processing_details").update({ | |
| "batch_job_status": True, | |
| "completed_at": datetime.utcnow().isoformat() | |
| }).match({"batch_job_id": batch_job_id}).execute() | |
| logger.info(f"Batch job {batch_job_id} processed successfully") | |
| except Exception as e: | |
| logger.error(f"Error processing batch job {batch_job_id}: {str(e)}") | |
| # Update status with error | |
| supabase.table("batch_processing_details").update({ | |
| "batch_job_status": False, | |
| "error": str(e), | |
| "completed_at": datetime.utcnow().isoformat() | |
| }).eq({"batch_job_id": batch_job_id}).execute() | |
| async def testv1(request: Request, background_tasks: BackgroundTasks): | |
| try: | |
| dataset = await request.json() | |
| # Create initial batch job record | |
| save_data = { | |
| 'batch_job_id': f"batch_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}", | |
| "batch_job_status": False, | |
| "created_at": datetime.utcnow().isoformat() | |
| } | |
| response = ( | |
| supabase.table("batch_processing_details") | |
| .insert(save_data) | |
| .execute() | |
| ) | |
| # Add processing to background tasks | |
| background_tasks.add_task(process_batch_job, dataset, save_data['batch_job_id']) | |
| return {'data': 'Batch job is scheduled!', 'batch_job_id': save_data['batch_job_id']}, | |
| except Exception as e: | |
| return {'error': str(e)} | |