Spaces:

pareshmishra
/

MT564AITraining

Running

MT564AITraining / services /llm_service.py

pareshmishra

Add full project source files for MT564 AI

2c72e40 5 months ago

7.3 kB

	import os
	import json
	import logging
	from typing import Dict, Any, List, Optional
	from openai import OpenAI

	logger = logging.getLogger(__name__)
	import os
	from dotenv import load_dotenv
	load_dotenv()

	print("✅ API KEY LOADED:", os.getenv("OPENAI_API_KEY"))
	class LLMService:
	"""Service for interacting with OpenAI LLM to process and consolidate scraped data"""

	def __init__(self, model_name: str = "gpt-4o"):
	"""
	Initialize LLM service

	Args:
	model_name: Name of the OpenAI model to use (default: gpt-4o)
	"""
	# the newest OpenAI model is "gpt-4o" which was released May 13, 2024.
	# do not change this unless explicitly requested by the user
	self.model_name = model_name
	self.api_key = os.environ.get("OPENAI_API_KEY")

	if not self.api_key:
	logger.warning("OpenAI API key not found in environment variables")

	self.client = OpenAI(api_key=self.api_key)

	# This method will be implemented in api/horoscope_routes.py
	def consolidate_horoscopes(self, horoscope_data):
	"""Placeholder method for consolidating horoscopes"""
	return {"error": "Method not implemented"}

	def consolidate_data(self, scraped_data: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""
	Consolidate data from multiple sources using LLM

	Args:
	scraped_data: List of scraped data from different sources

	Returns:
	Consolidated information as a dictionary
	"""
	if not scraped_data:
	return {"error": "No data provided for consolidation"}

	try:
	# Prepare data for LLM
	sources_text = ""
	for i, data in enumerate(scraped_data, 1):
	source_type = data.get("type", "unknown")
	title = data.get("title", "Unknown Title")
	source = data.get("source", "Unknown Source")
	text = data.get("text_content", "No content available")

	sources_text += f"SOURCE {i} ({source_type} from {source}):\n"
	sources_text += f"Title: {title}\n"
	sources_text += f"Content: {text[:2000]}...\n\n"

	# Create prompt for consolidation
	prompt = f"""
	Please analyze and consolidate the following information from multiple sources.

	{sources_text}

	Provide a comprehensive consolidation of this information in JSON format with the following structure:
	{{
	"main_topics": [list of main topics covered],
	"key_points": [list of key factual points from all sources],
	"summary": "A 2-3 paragraph summary that synthesizes the information",
	"analysis": "Brief analysis of the information and any discrepancies between sources",
	"sources": [list of sources used]
	}}

	Only include factual information present in the sources. Do not add any speculative or additional information.
	"""

	# Call OpenAI API
	response = self.client.chat.completions.create(
	model=self.model_name,
	messages=[
	{"role": "system", "content": "You are a data analysis expert specializing in consolidating information from multiple sources."},
	{"role": "user", "content": prompt}
	],
	response_format={"type": "json_object"},
	temperature=0.2
	)

	# Parse the response
	content = response.choices[0].message.content
	if content:
	result = json.loads(content)
	return result
	return {"error": "Empty response from LLM"}

	except Exception as e:
	logger.error(f"Error consolidating data with LLM: {str(e)}")
	return {"error": f"Failed to consolidate data: {str(e)}"}

	def summarize_content(self, text: str, max_length: int = 500) -> str:
	"""
	Summarize a single piece of content

	Args:
	text: Text to summarize
	max_length: Maximum length of summary in characters

	Returns:
	Summarized text
	"""
	if not text:
	return "No content to summarize"

	try:
	prompt = f"""
	Please summarize the following text concisely in no more than {max_length} characters,
	while maintaining all key information:

	{text[:10000]}
	"""

	response = self.client.chat.completions.create(
	model=self.model_name,
	messages=[
	{"role": "system", "content": "You are a summarization expert."},
	{"role": "user", "content": prompt}
	],
	temperature=0.3,
	max_tokens=max_length // 2 # Approximate token count
	)

	return response.choices[0].message.content

	except Exception as e:
	logger.error(f"Error summarizing content with LLM: {str(e)}")
	return f"Failed to summarize content: {str(e)}"

	def extract_key_information(self, text: str, info_type: Optional[str] = None) -> Dict[str, Any]:
	"""
	Extract specific type of information from content

	Args:
	text: Text to extract information from
	info_type: Type of information to extract (e.g., "news", "product", "research")

	Returns:
	Extracted information as dictionary
	"""
	if not text:
	return {"error": "No content provided"}

	try:
	type_instruction = ""
	if info_type:
	type_instruction = f"This is {info_type} content. "

	prompt = f"""
	{type_instruction}Please extract key structured information from the following text.
	Return the result as a JSON object with appropriate fields based on the content type.

	{text[:8000]}
	"""

	response = self.client.chat.completions.create(
	model=self.model_name,
	messages=[
	{"role": "system", "content": "You are a data extraction expert."},
	{"role": "user", "content": prompt}
	],
	response_format={"type": "json_object"},
	temperature=0.1
	)

	# Parse the response
	content = response.choices[0].message.content
	if content:
	result = json.loads(content)
	return result
	return {"error": "Empty response from LLM"}

	except Exception as e:
	logger.error(f"Error extracting information with LLM: {str(e)}")
	return {"error": f"Failed to extract information: {str(e)}"}


	# Create a singleton instance
	llm_service = LLMService()