Spaces:

Omkar008
/

batch_processing_openai

Sleeping

App Files Files Community

batch_processing_openai / main.py

Omkar008

Update main.py

9f88b7d verified over 1 year ago

raw

history blame

4.7 kB

	from fastapi import FastAPI, Request, BackgroundTasks
	import json
	import io
	from openai import OpenAI
	from supabase import create_client
	from typing import List, Dict, Any
	import asyncio
	import logging
	from datetime import datetime



	# Initialize logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	app = FastAPI()
	client = Client(api_key=os.getenv('OPENAI_API_KEY'),organization=os.getenv('ORG_ID'))
	url: str = os.getenv('SUPABASE_URL')
	key: str = os.getenv('SUPABASE_KEY')
	supabase: Client = create_client(url, key)

	async def process_batch_job(dataset: Dict[str, Any], batch_job_id: str):
	"""
	Background task to process the batch job
	"""
	try:
	logger.info(f"Starting batch processing for job {batch_job_id}")

	system_prompt = '''
	Your goal is to extract movie categories from movie descriptions, as well as a 1-sentence summary for these movies.
	You will be provided with a movie description, and you will output a json object containing the following information:

	{
	categories: string[] // Array of categories based on the movie description,
	summary: string // 1-sentence summary of the movie based on the movie description
	}

	Categories refer to the genre or type of the movie, like "action", "romance", "comedy", etc. Keep category names simple and use only lower case letters.
	Movies can have several categories, but try to keep it under 3-4. Only mention the categories that are the most obvious based on the description.
	'''

	openai_tasks = []
	for ds in dataset.get('data'):
	id = ds.get('imdb_id')
	description = ds.get('Description')
	task = {
	"custom_id": f"task-{id}",
	"method": "POST",
	"url": "/v1/chat/completions",
	"body": {
	"model": "gpt-4o-mini",
	"temperature": 0.1,
	"response_format": {
	"type": "json_object"
	},
	"messages": [
	{
	"role": "system",
	"content": system_prompt
	},
	{
	"role": "user",
	"content": description
	}
	]
	}
	}
	openai_tasks.append(task)

	# Create batch file
	json_obj = io.BytesIO()
	for obj in openai_tasks:
	json_obj.write((json.dumps(obj) + '\n').encode('utf-8'))

	batch_file = client.files.create(
	file=json_obj,
	purpose="batch"
	)

	# Create batch job
	batch_job = client.batches.create(
	input_file_id=batch_file.id,
	endpoint="/v1/chat/completions",
	completion_window="24h"
	)

	# Update status in Supabase
	supabase.table("batch_processing_details").update({
	"batch_job_status": True,
	"completed_at": datetime.utcnow().isoformat()
	}).match({"batch_job_id": batch_job_id}).execute()

	logger.info(f"Batch job {batch_job_id} processed successfully")

	except Exception as e:
	logger.error(f"Error processing batch job {batch_job_id}: {str(e)}")
	# Update status with error
	supabase.table("batch_processing_details").update({
	"batch_job_status": False,
	"error": str(e),
	"completed_at": datetime.utcnow().isoformat()
	}).eq({"batch_job_id": batch_job_id}).execute()

	@app.post("/test/v1")
	async def testv1(request: Request, background_tasks: BackgroundTasks):
	try:
	dataset = await request.json()

	# Create initial batch job record
	save_data = {
	'batch_job_id': f"batch_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}",
	"batch_job_status": False,
	"created_at": datetime.utcnow().isoformat()
	}

	response = (
	supabase.table("batch_processing_details")
	.insert(save_data)
	.execute()
	)

	# Add processing to background tasks
	background_tasks.add_task(process_batch_job, dataset, save_data['batch_job_id'])

	return {'data': 'Batch job is scheduled!', 'batch_job_id': save_data['batch_job_id']},


	except Exception as e:
	return {'error': str(e)}