Spaces:
Running
Running
| import os | |
| import json | |
| import logging | |
| from typing import Dict, Any, List, Optional | |
| from openai import OpenAI | |
| logger = logging.getLogger(__name__) | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| print("✅ API KEY LOADED:", os.getenv("OPENAI_API_KEY")) | |
| class LLMService: | |
| """Service for interacting with OpenAI LLM to process and consolidate scraped data""" | |
| def __init__(self, model_name: str = "gpt-4o"): | |
| """ | |
| Initialize LLM service | |
| Args: | |
| model_name: Name of the OpenAI model to use (default: gpt-4o) | |
| """ | |
| # the newest OpenAI model is "gpt-4o" which was released May 13, 2024. | |
| # do not change this unless explicitly requested by the user | |
| self.model_name = model_name | |
| self.api_key = os.environ.get("OPENAI_API_KEY") | |
| if not self.api_key: | |
| logger.warning("OpenAI API key not found in environment variables") | |
| self.client = OpenAI(api_key=self.api_key) | |
| # This method will be implemented in api/horoscope_routes.py | |
| def consolidate_horoscopes(self, horoscope_data): | |
| """Placeholder method for consolidating horoscopes""" | |
| return {"error": "Method not implemented"} | |
| def consolidate_data(self, scraped_data: List[Dict[str, Any]]) -> Dict[str, Any]: | |
| """ | |
| Consolidate data from multiple sources using LLM | |
| Args: | |
| scraped_data: List of scraped data from different sources | |
| Returns: | |
| Consolidated information as a dictionary | |
| """ | |
| if not scraped_data: | |
| return {"error": "No data provided for consolidation"} | |
| try: | |
| # Prepare data for LLM | |
| sources_text = "" | |
| for i, data in enumerate(scraped_data, 1): | |
| source_type = data.get("type", "unknown") | |
| title = data.get("title", "Unknown Title") | |
| source = data.get("source", "Unknown Source") | |
| text = data.get("text_content", "No content available") | |
| sources_text += f"SOURCE {i} ({source_type} from {source}):\n" | |
| sources_text += f"Title: {title}\n" | |
| sources_text += f"Content: {text[:2000]}...\n\n" | |
| # Create prompt for consolidation | |
| prompt = f""" | |
| Please analyze and consolidate the following information from multiple sources. | |
| {sources_text} | |
| Provide a comprehensive consolidation of this information in JSON format with the following structure: | |
| {{ | |
| "main_topics": [list of main topics covered], | |
| "key_points": [list of key factual points from all sources], | |
| "summary": "A 2-3 paragraph summary that synthesizes the information", | |
| "analysis": "Brief analysis of the information and any discrepancies between sources", | |
| "sources": [list of sources used] | |
| }} | |
| Only include factual information present in the sources. Do not add any speculative or additional information. | |
| """ | |
| # Call OpenAI API | |
| response = self.client.chat.completions.create( | |
| model=self.model_name, | |
| messages=[ | |
| {"role": "system", "content": "You are a data analysis expert specializing in consolidating information from multiple sources."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| response_format={"type": "json_object"}, | |
| temperature=0.2 | |
| ) | |
| # Parse the response | |
| content = response.choices[0].message.content | |
| if content: | |
| result = json.loads(content) | |
| return result | |
| return {"error": "Empty response from LLM"} | |
| except Exception as e: | |
| logger.error(f"Error consolidating data with LLM: {str(e)}") | |
| return {"error": f"Failed to consolidate data: {str(e)}"} | |
| def summarize_content(self, text: str, max_length: int = 500) -> str: | |
| """ | |
| Summarize a single piece of content | |
| Args: | |
| text: Text to summarize | |
| max_length: Maximum length of summary in characters | |
| Returns: | |
| Summarized text | |
| """ | |
| if not text: | |
| return "No content to summarize" | |
| try: | |
| prompt = f""" | |
| Please summarize the following text concisely in no more than {max_length} characters, | |
| while maintaining all key information: | |
| {text[:10000]} | |
| """ | |
| response = self.client.chat.completions.create( | |
| model=self.model_name, | |
| messages=[ | |
| {"role": "system", "content": "You are a summarization expert."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.3, | |
| max_tokens=max_length // 2 # Approximate token count | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| logger.error(f"Error summarizing content with LLM: {str(e)}") | |
| return f"Failed to summarize content: {str(e)}" | |
| def extract_key_information(self, text: str, info_type: Optional[str] = None) -> Dict[str, Any]: | |
| """ | |
| Extract specific type of information from content | |
| Args: | |
| text: Text to extract information from | |
| info_type: Type of information to extract (e.g., "news", "product", "research") | |
| Returns: | |
| Extracted information as dictionary | |
| """ | |
| if not text: | |
| return {"error": "No content provided"} | |
| try: | |
| type_instruction = "" | |
| if info_type: | |
| type_instruction = f"This is {info_type} content. " | |
| prompt = f""" | |
| {type_instruction}Please extract key structured information from the following text. | |
| Return the result as a JSON object with appropriate fields based on the content type. | |
| {text[:8000]} | |
| """ | |
| response = self.client.chat.completions.create( | |
| model=self.model_name, | |
| messages=[ | |
| {"role": "system", "content": "You are a data extraction expert."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| response_format={"type": "json_object"}, | |
| temperature=0.1 | |
| ) | |
| # Parse the response | |
| content = response.choices[0].message.content | |
| if content: | |
| result = json.loads(content) | |
| return result | |
| return {"error": "Empty response from LLM"} | |
| except Exception as e: | |
| logger.error(f"Error extracting information with LLM: {str(e)}") | |
| return {"error": f"Failed to extract information: {str(e)}"} | |
| # Create a singleton instance | |
| llm_service = LLMService() | |