polycorr-backend / llm_service.py
dhruv575
Dependency Issues
35ead53
"""
LLM Service for Asset Correlation Analysis
Uses OpenAI API to identify correlated and inversely correlated assets
"""
import os
from typing import Dict, List
import json
from openai import OpenAI
class LLMService:
"""Service to interact with LLM for asset correlation analysis"""
def __init__(self, api_key: str = None):
"""
Initialize LLM service
Args:
api_key: OpenAI API key (if not provided, will use OPENAI_API_KEY env var)
"""
self.api_key = api_key or os.getenv('OPENAI_API_KEY')
if not self.api_key:
raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
# Initialize OpenAI client with explicit http_client configuration
# This avoids httpx version conflicts by using requests as fallback
try:
# Try modern client initialization first
self.client = OpenAI(
api_key=self.api_key,
timeout=60.0,
max_retries=2
)
except (TypeError, AttributeError) as e:
# Fallback: if there's a version conflict, try without extra params
try:
self.client = OpenAI(api_key=self.api_key)
except Exception as e2:
raise ValueError(
f"Failed to initialize OpenAI client. "
f"This may be due to a version conflict. Error: {str(e2)}"
)
# Using gpt-4o-mini as it's the latest efficient model
self.model = "gpt-4o-mini"
def get_correlated_assets(self, market_question: str) -> Dict:
"""
Get correlated and inversely correlated assets for a market question
Args:
market_question: The Polymarket question to analyze
Returns:
Dictionary containing:
{
"correlated": [
{"ticker": "AAPL", "reason": "..."},
...
],
"inversely_correlated": [
{"ticker": "GLD", "reason": "..."},
...
]
}
"""
system_prompt = """You are a financial analyst expert. Given a prediction market question,
identify **real, publicly traded** assets (stocks or ETFs) that would likely be correlated or inversely
correlated with the outcome of that question.
You MUST follow all of these rules strictly:
- Only use ticker symbols for **publicly traded stocks or ETFs**
- Tickers must be 1–5 UPPERCASE letters (optionally with a single dot and suffix, like BRK.B)
- Do NOT invent tickers or use names of companies, organizations, or concepts as tickers (e.g. OPENAI, BITCOIN, TESLA INC)
- Do NOT use crypto symbols, indices (like SPX), or OTC/pink sheet symbols
- Prefer highly liquid US-listed stocks and ETFs that are available on Yahoo Finance
Return ONLY valid JSON in exactly this format:
{
"correlated": [
{"ticker": "TICKER_SYMBOL", "reason": "One sentence explanation"},
{"ticker": "TICKER_SYMBOL", "reason": "One sentence explanation"}
],
"inversely_correlated": [
{"ticker": "TICKER_SYMBOL", "reason": "One sentence explanation"},
{"ticker": "TICKER_SYMBOL", "reason": "One sentence explanation"}
]
}
Requirements:
- Provide 2-5 correlated assets
- Provide 2-5 inversely correlated assets
- Use valid ticker symbols (stocks or ETFs) that can be found on Yahoo Finance
- Each ticker MUST obey the 1–5 uppercase letter rule (with optional single dot suffix)
- Keep reasons to one clear sentence
- Only return valid JSON, no additional text"""
user_prompt = f"""Market Question: "{market_question}"
Identify correlated and inversely correlated assets for this prediction market."""
try:
# Use modern OpenAI v1.x API
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.7,
max_tokens=2000
)
# Extract the response content
content = response.choices[0].message.content.strip()
# Parse JSON response
try:
result = json.loads(content)
except json.JSONDecodeError:
# Try to extract JSON if there's extra text
import re
json_match = re.search(r'\{.*\}', content, re.DOTALL)
if json_match:
result = json.loads(json_match.group())
else:
raise ValueError("Could not parse JSON from LLM response")
# Validate structure
if 'correlated' not in result or 'inversely_correlated' not in result:
raise ValueError("Invalid response structure from LLM")
# Validate that we have the right number of assets
if not (2 <= len(result['correlated']) <= 5):
raise ValueError(f"Expected 2-5 correlated assets, got {len(result['correlated'])}")
if not (2 <= len(result['inversely_correlated']) <= 5):
raise ValueError(f"Expected 2-5 inversely correlated assets, got {len(result['inversely_correlated'])}")
# Validate structure of each asset
for asset_list in [result['correlated'], result['inversely_correlated']]:
for asset in asset_list:
if 'ticker' not in asset or 'reason' not in asset:
raise ValueError("Each asset must have 'ticker' and 'reason' fields")
# Basic format validation for tickers (1-5 uppercase letters, optional .suffix)
import re
def _is_plausible_ticker(t: str) -> bool:
return bool(re.match(r'^[A-Z]{1,5}(\.[A-Z]{1,3})?$', t.strip()))
# Filter out clearly invalid tickers
filtered = {"correlated": [], "inversely_correlated": []}
invalid_tickers = []
for key in ["correlated", "inversely_correlated"]:
for asset in result[key]:
ticker = asset["ticker"].strip().upper()
asset["ticker"] = ticker
if _is_plausible_ticker(ticker):
filtered[key].append(asset)
else:
invalid_tickers.append(ticker)
# Ensure we still have enough assets after filtering
if len(filtered["correlated"]) < 2 or len(filtered["inversely_correlated"]) < 2:
raise ValueError(
f"LLM returned insufficient valid tickers after validation. "
f"Invalid tickers: {sorted(set(invalid_tickers))}"
)
return filtered
except Exception as e:
raise Exception(f"Error calling OpenAI API: {str(e)}")
def validate_tickers(self, tickers: List[str]) -> Dict[str, bool]:
"""
Validate that ticker symbols are reasonable
Args:
tickers: List of ticker symbols
Returns:
Dictionary mapping ticker to validity (True/False)
"""
# Basic validation - check format
valid_tickers = {}
for ticker in tickers:
# Ticker should be 1-5 uppercase letters/numbers
import re
is_valid = bool(re.match(r'^[A-Z]{1,5}$', ticker))
valid_tickers[ticker] = is_valid
return valid_tickers
# Example usage
if __name__ == "__main__":
# This requires OPENAI_API_KEY environment variable to be set
try:
llm_service = LLMService()
# Test with example market question
test_question = "Will the Supreme Court rule in favor of Trump's tariffs?"
print(f"Analyzing: {test_question}\n")
result = llm_service.get_correlated_assets(test_question)
print("=== Correlated Assets ===")
for asset in result['correlated']:
print(f" {asset['ticker']}: {asset['reason']}")
print("\n=== Inversely Correlated Assets ===")
for asset in result['inversely_correlated']:
print(f" {asset['ticker']}: {asset['reason']}")
except Exception as e:
print(f"Error: {str(e)}")
print("\nMake sure OPENAI_API_KEY environment variable is set.")