Spaces:

droov
/

polycorr-backend

Sleeping

dhruv575

Dependency Issues

35ead53 5 months ago

8.74 kB

	"""
	LLM Service for Asset Correlation Analysis
	Uses OpenAI API to identify correlated and inversely correlated assets
	"""

	import os
	from typing import Dict, List
	import json
	from openai import OpenAI


	class LLMService:
	"""Service to interact with LLM for asset correlation analysis"""

	def __init__(self, api_key: str = None):
	"""
	Initialize LLM service

	Args:
	api_key: OpenAI API key (if not provided, will use OPENAI_API_KEY env var)
	"""
	self.api_key = api_key or os.getenv('OPENAI_API_KEY')

	if not self.api_key:
	raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")

	# Initialize OpenAI client with explicit http_client configuration
	# This avoids httpx version conflicts by using requests as fallback
	try:
	# Try modern client initialization first
	self.client = OpenAI(
	api_key=self.api_key,
	timeout=60.0,
	max_retries=2
	)
	except (TypeError, AttributeError) as e:
	# Fallback: if there's a version conflict, try without extra params
	try:
	self.client = OpenAI(api_key=self.api_key)
	except Exception as e2:
	raise ValueError(
	f"Failed to initialize OpenAI client. "
	f"This may be due to a version conflict. Error: {str(e2)}"
	)

	# Using gpt-4o-mini as it's the latest efficient model
	self.model = "gpt-4o-mini"

	def get_correlated_assets(self, market_question: str) -> Dict:
	"""
	Get correlated and inversely correlated assets for a market question

	Args:
	market_question: The Polymarket question to analyze

	Returns:
	Dictionary containing:
	{
	"correlated": [
	{"ticker": "AAPL", "reason": "..."},
	...
	],
	"inversely_correlated": [
	{"ticker": "GLD", "reason": "..."},
	...
	]
	}
	"""
	system_prompt = """You are a financial analyst expert. Given a prediction market question,
	identify real, publicly traded assets (stocks or ETFs) that would likely be correlated or inversely
	correlated with the outcome of that question.

	You MUST follow all of these rules strictly:
	- Only use ticker symbols for publicly traded stocks or ETFs
	- Tickers must be 1–5 UPPERCASE letters (optionally with a single dot and suffix, like BRK.B)
	- Do NOT invent tickers or use names of companies, organizations, or concepts as tickers (e.g. OPENAI, BITCOIN, TESLA INC)
	- Do NOT use crypto symbols, indices (like SPX), or OTC/pink sheet symbols
	- Prefer highly liquid US-listed stocks and ETFs that are available on Yahoo Finance

	Return ONLY valid JSON in exactly this format:
	{
	"correlated": [
	{"ticker": "TICKER_SYMBOL", "reason": "One sentence explanation"},
	{"ticker": "TICKER_SYMBOL", "reason": "One sentence explanation"}
	],
	"inversely_correlated": [
	{"ticker": "TICKER_SYMBOL", "reason": "One sentence explanation"},
	{"ticker": "TICKER_SYMBOL", "reason": "One sentence explanation"}
	]
	}

	Requirements:
	- Provide 2-5 correlated assets
	- Provide 2-5 inversely correlated assets
	- Use valid ticker symbols (stocks or ETFs) that can be found on Yahoo Finance
	- Each ticker MUST obey the 1–5 uppercase letter rule (with optional single dot suffix)
	- Keep reasons to one clear sentence
	- Only return valid JSON, no additional text"""

	user_prompt = f"""Market Question: "{market_question}"

	Identify correlated and inversely correlated assets for this prediction market."""

	try:
	# Use modern OpenAI v1.x API
	response = self.client.chat.completions.create(
	model=self.model,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	temperature=0.7,
	max_tokens=2000
	)

	# Extract the response content
	content = response.choices[0].message.content.strip()

	# Parse JSON response
	try:
	result = json.loads(content)
	except json.JSONDecodeError:
	# Try to extract JSON if there's extra text
	import re
	json_match = re.search(r'\{.*\}', content, re.DOTALL)
	if json_match:
	result = json.loads(json_match.group())
	else:
	raise ValueError("Could not parse JSON from LLM response")

	# Validate structure
	if 'correlated' not in result or 'inversely_correlated' not in result:
	raise ValueError("Invalid response structure from LLM")

	# Validate that we have the right number of assets
	if not (2 <= len(result['correlated']) <= 5):
	raise ValueError(f"Expected 2-5 correlated assets, got {len(result['correlated'])}")

	if not (2 <= len(result['inversely_correlated']) <= 5):
	raise ValueError(f"Expected 2-5 inversely correlated assets, got {len(result['inversely_correlated'])}")

	# Validate structure of each asset
	for asset_list in [result['correlated'], result['inversely_correlated']]:
	for asset in asset_list:
	if 'ticker' not in asset or 'reason' not in asset:
	raise ValueError("Each asset must have 'ticker' and 'reason' fields")

	# Basic format validation for tickers (1-5 uppercase letters, optional .suffix)
	import re
	def _is_plausible_ticker(t: str) -> bool:
	return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z]{1,3})?$', t.strip()))

	# Filter out clearly invalid tickers
	filtered = {"correlated": [], "inversely_correlated": []}
	invalid_tickers = []

	for key in ["correlated", "inversely_correlated"]:
	for asset in result[key]:
	ticker = asset["ticker"].strip().upper()
	asset["ticker"] = ticker
	if _is_plausible_ticker(ticker):
	filtered[key].append(asset)
	else:
	invalid_tickers.append(ticker)

	# Ensure we still have enough assets after filtering
	if len(filtered["correlated"]) < 2 or len(filtered["inversely_correlated"]) < 2:
	raise ValueError(
	f"LLM returned insufficient valid tickers after validation. "
	f"Invalid tickers: {sorted(set(invalid_tickers))}"
	)

	return filtered

	except Exception as e:
	raise Exception(f"Error calling OpenAI API: {str(e)}")

	def validate_tickers(self, tickers: List[str]) -> Dict[str, bool]:
	"""
	Validate that ticker symbols are reasonable

	Args:
	tickers: List of ticker symbols

	Returns:
	Dictionary mapping ticker to validity (True/False)
	"""
	# Basic validation - check format
	valid_tickers = {}
	for ticker in tickers:
	# Ticker should be 1-5 uppercase letters/numbers
	import re
	is_valid = bool(re.match(r'^[A-Z]{1,5}$', ticker))
	valid_tickers[ticker] = is_valid

	return valid_tickers


	# Example usage
	if __name__ == "__main__":
	# This requires OPENAI_API_KEY environment variable to be set
	try:
	llm_service = LLMService()

	# Test with example market question
	test_question = "Will the Supreme Court rule in favor of Trump's tariffs?"

	print(f"Analyzing: {test_question}\n")

	result = llm_service.get_correlated_assets(test_question)

	print("=== Correlated Assets ===")
	for asset in result['correlated']:
	print(f" {asset['ticker']}: {asset['reason']}")

	print("\n=== Inversely Correlated Assets ===")
	for asset in result['inversely_correlated']:
	print(f" {asset['ticker']}: {asset['reason']}")

	except Exception as e:
	print(f"Error: {str(e)}")
	print("\nMake sure OPENAI_API_KEY environment variable is set.")