Spaces:
Sleeping
Sleeping
pranav8tripathi@gmail.com commited on
Commit ·
1a734a9
1
Parent(s): 27359ef
updated request body
Browse files- app/__pycache__/main.cpython-313.pyc +0 -0
- app/data_sources/__pycache__/wikidata.cpython-313.pyc +0 -0
- app/data_sources/wikidata.py +65 -0
- app/main.py +278 -129
app/__pycache__/main.cpython-313.pyc
CHANGED
|
Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ
|
|
|
app/data_sources/__pycache__/wikidata.cpython-313.pyc
CHANGED
|
Binary files a/app/data_sources/__pycache__/wikidata.cpython-313.pyc and b/app/data_sources/__pycache__/wikidata.cpython-313.pyc differ
|
|
|
app/data_sources/wikidata.py
CHANGED
|
@@ -143,6 +143,71 @@ class WikidataClient:
|
|
| 143 |
|
| 144 |
return result.get("entities", {}).get(entity_id, {})
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
async def get_company_info(self, company_name: str, language: str = "en") -> Dict[str, Any]:
|
| 147 |
"""Get structured information about a company.
|
| 148 |
|
|
|
|
| 143 |
|
| 144 |
return result.get("entities", {}).get(entity_id, {})
|
| 145 |
|
| 146 |
+
async def get_related_entities(
|
| 147 |
+
self,
|
| 148 |
+
entity_id: str,
|
| 149 |
+
relation_type: str = "competitor",
|
| 150 |
+
language: str = "en",
|
| 151 |
+
limit: int = 5
|
| 152 |
+
) -> List[Dict[str, Any]]:
|
| 153 |
+
"""Get entities related to the specified entity by a specific relation type.
|
| 154 |
+
|
| 155 |
+
Args:
|
| 156 |
+
entity_id: The Wikidata entity ID (e.g., "Q478214")
|
| 157 |
+
relation_type: Type of relation to look for (default: "competitor")
|
| 158 |
+
language: Language code for labels (default: "en")
|
| 159 |
+
limit: Maximum number of related entities to return (default: 5)
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
List of related entities with their details
|
| 163 |
+
"""
|
| 164 |
+
# Map relation types to Wikidata property IDs
|
| 165 |
+
relation_properties = {
|
| 166 |
+
"competitor": "P1592", # competitor
|
| 167 |
+
"parent_company": "P749", # parent organization
|
| 168 |
+
"subsidiary": "P355", # subsidiary
|
| 169 |
+
"industry": "P452", # industry
|
| 170 |
+
"product": "P1056" # product or material produced
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
property_id = relation_properties.get(relation_type, "P1592") # Default to competitor
|
| 174 |
+
|
| 175 |
+
# First, get all claims for the entity
|
| 176 |
+
entity = await self.get_entity_details(entity_id, language=language, properties=["claims"])
|
| 177 |
+
if "error" in entity:
|
| 178 |
+
self.logger.error(f"Failed to get entity details: {entity['error']}")
|
| 179 |
+
return []
|
| 180 |
+
|
| 181 |
+
# Find claims with the specified property
|
| 182 |
+
claims = entity.get("claims", {}).get(property_id, [])
|
| 183 |
+
|
| 184 |
+
# Extract related entity IDs
|
| 185 |
+
related_ids = []
|
| 186 |
+
for claim in claims[:limit]: # Limit the number of results
|
| 187 |
+
if claim.get("mainsnak", {}).get("datatype") == "wikibase-item":
|
| 188 |
+
value = claim.get("mainsnak", {}).get("datavalue", {}).get("value")
|
| 189 |
+
if value and isinstance(value, dict) and value.get("id"):
|
| 190 |
+
related_ids.append(value["id"])
|
| 191 |
+
|
| 192 |
+
if not related_ids:
|
| 193 |
+
return []
|
| 194 |
+
|
| 195 |
+
# Get details for each related entity
|
| 196 |
+
related_entities = []
|
| 197 |
+
for rel_id in related_ids:
|
| 198 |
+
try:
|
| 199 |
+
details = await self.get_entity_details(
|
| 200 |
+
rel_id,
|
| 201 |
+
language=language,
|
| 202 |
+
properties=["labels", "descriptions", "claims"]
|
| 203 |
+
)
|
| 204 |
+
if details and "error" not in details:
|
| 205 |
+
related_entities.append(details)
|
| 206 |
+
except Exception as e:
|
| 207 |
+
self.logger.warning(f"Failed to get details for related entity {rel_id}: {str(e)}")
|
| 208 |
+
|
| 209 |
+
return related_entities
|
| 210 |
+
|
| 211 |
async def get_company_info(self, company_name: str, language: str = "en") -> Dict[str, Any]:
|
| 212 |
"""Get structured information about a company.
|
| 213 |
|
app/main.py
CHANGED
|
@@ -45,9 +45,58 @@ app.add_middleware(
|
|
| 45 |
allow_headers=["*"],
|
| 46 |
)
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
# Helper functions
|
| 49 |
-
async def build_system_prompt(
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
categories = ", ".join(insight_selection)
|
| 52 |
deep_dive_text = f"\nFor deeper analysis, focus on: {', '.join(deep_dive)}." if deep_dive else ""
|
| 53 |
|
|
@@ -77,27 +126,55 @@ async def build_system_prompt(company_name: str, insight_selection: List[str], d
|
|
| 77 |
)
|
| 78 |
|
| 79 |
|
| 80 |
-
async def generate_insights(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
"""Generate insights for a single company using the LLM.
|
| 82 |
|
| 83 |
Args:
|
| 84 |
company: The company data to analyze
|
| 85 |
categories: List of categories to focus the analysis on
|
| 86 |
business_name: Name of the business being analyzed (for context in the prompt)
|
|
|
|
| 87 |
"""
|
| 88 |
try:
|
| 89 |
-
system_prompt = await build_system_prompt(company.name, categories)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Prepare user prompt with company data
|
| 92 |
user_prompt = (
|
| 93 |
-
f"
|
| 94 |
-
f"
|
| 95 |
-
f"
|
| 96 |
-
f"
|
| 97 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
f"• {chr(10)+'• '.join(categories)}\n\n"
|
| 99 |
-
f"
|
| 100 |
-
f"Compare with {business_name} where relevant
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
)
|
| 102 |
|
| 103 |
logger.info(f"Generating insights for {company.name}...")
|
|
@@ -174,19 +251,51 @@ async def get_company_details(request: CompanyDetailsRequest):
|
|
| 174 |
detail=f"Failed to fetch company details: {str(e)}"
|
| 175 |
)
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
@app.post("/api/v1/analyze", response_model=ReportResponse)
|
| 178 |
async def analyze_competitors(
|
| 179 |
-
payload:
|
| 180 |
background_tasks: BackgroundTasks
|
| 181 |
):
|
| 182 |
"""
|
| 183 |
-
Main endpoint for
|
| 184 |
|
| 185 |
This endpoint:
|
| 186 |
-
1.
|
| 187 |
-
2.
|
| 188 |
-
3. Fetches detailed information
|
| 189 |
-
4. Generates insights using the LLM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
"""
|
| 191 |
# Initialize Wikidata client
|
| 192 |
wikidata = WikidataClient()
|
|
@@ -194,10 +303,32 @@ async def analyze_competitors(
|
|
| 194 |
# Generate a unique request ID
|
| 195 |
request_id = str(uuid.uuid4())
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
# Get company info
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
try:
|
|
|
|
| 201 |
# Step 1: Search for the company on Wikidata
|
| 202 |
company_search = await wikidata.search_entity(company_name, limit=1)
|
| 203 |
if not company_search:
|
|
@@ -208,146 +339,164 @@ async def analyze_competitors(
|
|
| 208 |
|
| 209 |
company_id = company_search[0]['id']
|
| 210 |
|
| 211 |
-
# Step 2: Get company details
|
| 212 |
company_details = await wikidata.get_entity_details(company_id)
|
| 213 |
|
| 214 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
competitors = []
|
| 216 |
-
if
|
| 217 |
-
# Use manually provided competitors
|
| 218 |
-
for comp_name in payload.competitor_choice.competitors:
|
| 219 |
-
comp_search = await wikidata.search_entity(comp_name, limit=1)
|
| 220 |
-
if comp_search:
|
| 221 |
-
competitors.append(comp_search[0])
|
| 222 |
-
else:
|
| 223 |
-
# Auto-discover competitors
|
| 224 |
competitors = await wikidata.get_related_entities(
|
| 225 |
company_id,
|
| 226 |
relation_type='competitor', # P1592
|
| 227 |
limit=min(5, getattr(settings, 'MAX_COMPETITORS', 5))
|
| 228 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
competitor_details = []
|
| 238 |
-
for comp in competitors:
|
| 239 |
-
try:
|
| 240 |
-
details = await wikidata.get_entity_details(comp['id'])
|
| 241 |
-
competitor_details.append(details)
|
| 242 |
-
except Exception as e:
|
| 243 |
-
logger.warning(f"Failed to get details for competitor {comp.get('id')}: {str(e)}")
|
| 244 |
|
| 245 |
-
# Step 5: Generate insights
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
-
#
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
business_name=company_name
|
| 263 |
-
)
|
| 264 |
-
tasks.append(task)
|
| 265 |
|
| 266 |
-
#
|
| 267 |
-
|
| 268 |
|
| 269 |
-
#
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
valid_insights.append(insight)
|
| 276 |
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
detail="Failed to generate insights for any competitors"
|
| 281 |
-
)
|
| 282 |
-
|
| 283 |
-
# Combine all insights into a single, well-formatted summary
|
| 284 |
-
combined_summary = f"📊 **Competitive Analysis Report**\n\n"
|
| 285 |
|
| 286 |
-
# Add
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
-
# Add
|
| 292 |
-
combined_summary += "##
|
| 293 |
-
combined_summary += f"**Name**: {company_details.get('labels', {}).get('en', {}).get('value', company_name)}\n"
|
| 294 |
-
if 'descriptions' in company_details and 'en' in company_details['descriptions']:
|
| 295 |
-
combined_summary += f"**Description**: {company_details['descriptions']['en']['value']}\n"
|
| 296 |
-
if 'inception' in company_details:
|
| 297 |
-
combined_summary += f"**Founded**: {company_details['inception']}\n"
|
| 298 |
-
combined_summary += "\n"
|
| 299 |
|
| 300 |
-
#
|
| 301 |
-
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
-
# Add
|
| 305 |
-
combined_summary += "##
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
# Extract the first 3 bullet points from the summary
|
| 310 |
-
points = [line.strip() for line in insight.summary.split('•') if line.strip()][:3]
|
| 311 |
-
for point in points:
|
| 312 |
-
combined_summary += f"• {point.strip()}\n"
|
| 313 |
-
combined_summary += "\n"
|
| 314 |
|
| 315 |
-
# Add
|
| 316 |
-
combined_summary += "
|
| 317 |
-
for i, insight in enumerate(valid_insights[:3]): # Top 3 recommendations
|
| 318 |
-
if insight.category_breakdown:
|
| 319 |
-
category = next(iter(insight.category_breakdown.keys()), 'their operations')
|
| 320 |
-
combined_summary += f"{i+1}. Consider {insight.company.name}'s approach to {category}\n"
|
| 321 |
-
combined_summary += "\n"
|
| 322 |
|
| 323 |
-
#
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
|
|
|
| 328 |
|
| 329 |
-
# Prepare the final response
|
| 330 |
report = ReportResponse(
|
| 331 |
request_id=request_id,
|
| 332 |
company_name=company_name,
|
| 333 |
generated_at=datetime.utcnow().isoformat(),
|
| 334 |
summary=combined_summary,
|
| 335 |
-
metrics=
|
| 336 |
-
insights=[], #
|
| 337 |
-
recommendations=[], #
|
| 338 |
sources=[], # Sources are included in the summary
|
| 339 |
-
pdf_url=
|
| 340 |
)
|
| 341 |
|
| 342 |
-
# Handle export if requested
|
| 343 |
-
if payload.preferences and payload.preferences.export_format:
|
| 344 |
-
export_format = payload.preferences.export_format.lower()
|
| 345 |
-
logger.info(f"Export requested in format: {export_format}")
|
| 346 |
-
|
| 347 |
-
if export_format == 'pdf':
|
| 348 |
-
# In a real implementation, generate PDF here
|
| 349 |
-
report.pdf_url = f"/api/v1/exports/{request_id}.pdf"
|
| 350 |
-
|
| 351 |
return report
|
| 352 |
|
| 353 |
except HTTPException:
|
|
|
|
| 45 |
allow_headers=["*"],
|
| 46 |
)
|
| 47 |
|
| 48 |
+
# Constants for research modes
|
| 49 |
+
RESEARCH_MODES = {
|
| 50 |
+
"quick": {
|
| 51 |
+
"max_tokens": 1000,
|
| 52 |
+
"time_estimate": "2-3 minutes",
|
| 53 |
+
"description": "Quick snapshot for rapid insights"
|
| 54 |
+
},
|
| 55 |
+
"standard": {
|
| 56 |
+
"max_tokens": 2500,
|
| 57 |
+
"time_estimate": "5-7 minutes",
|
| 58 |
+
"description": "Balanced overview with key insights"
|
| 59 |
+
},
|
| 60 |
+
"deep": {
|
| 61 |
+
"max_tokens": 5000,
|
| 62 |
+
"time_estimate": "12-15 minutes",
|
| 63 |
+
"description": "Comprehensive research with detailed analysis"
|
| 64 |
+
},
|
| 65 |
+
"custom": {
|
| 66 |
+
"base_tokens": 1000,
|
| 67 |
+
"per_insight_tokens": 200,
|
| 68 |
+
"time_estimate": "Variable",
|
| 69 |
+
"description": "Custom research based on selected areas"
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
# Helper functions
|
| 74 |
+
async def build_system_prompt(
|
| 75 |
+
company_name: str,
|
| 76 |
+
insight_selection: List[str],
|
| 77 |
+
research_mode: str = "standard",
|
| 78 |
+
deep_dive: Optional[List[str]] = None
|
| 79 |
+
) -> str:
|
| 80 |
+
"""Build a robust system prompt for the LLM to generate a single formatted summary.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
company_name: Name of the company to analyze
|
| 84 |
+
insight_selection: List of insight categories to include
|
| 85 |
+
research_mode: One of 'quick', 'standard', 'deep', or 'custom'
|
| 86 |
+
deep_dive: Optional list of categories for deeper analysis
|
| 87 |
+
"""
|
| 88 |
+
# Validate research mode
|
| 89 |
+
if research_mode not in RESEARCH_MODES:
|
| 90 |
+
research_mode = "standard"
|
| 91 |
+
|
| 92 |
+
mode_info = RESEARCH_MODES[research_mode]
|
| 93 |
+
|
| 94 |
+
# Calculate token budget for custom mode
|
| 95 |
+
if research_mode == "custom":
|
| 96 |
+
max_tokens = mode_info["base_tokens"] + (len(insight_selection) * mode_info["per_insight_tokens"])
|
| 97 |
+
else:
|
| 98 |
+
max_tokens = mode_info["max_tokens"]
|
| 99 |
+
|
| 100 |
categories = ", ".join(insight_selection)
|
| 101 |
deep_dive_text = f"\nFor deeper analysis, focus on: {', '.join(deep_dive)}." if deep_dive else ""
|
| 102 |
|
|
|
|
| 126 |
)
|
| 127 |
|
| 128 |
|
| 129 |
+
async def generate_insights(
|
| 130 |
+
company: CompanyData,
|
| 131 |
+
categories: list,
|
| 132 |
+
business_name: str = "your business",
|
| 133 |
+
research_mode: str = "standard"
|
| 134 |
+
) -> CompetitorInsight:
|
| 135 |
"""Generate insights for a single company using the LLM.
|
| 136 |
|
| 137 |
Args:
|
| 138 |
company: The company data to analyze
|
| 139 |
categories: List of categories to focus the analysis on
|
| 140 |
business_name: Name of the business being analyzed (for context in the prompt)
|
| 141 |
+
research_mode: One of 'quick', 'standard', 'deep', or 'custom'
|
| 142 |
"""
|
| 143 |
try:
|
| 144 |
+
system_prompt = await build_system_prompt(company.name, categories, research_mode)
|
| 145 |
+
|
| 146 |
+
# Get research mode details
|
| 147 |
+
mode_info = RESEARCH_MODES.get(research_mode, RESEARCH_MODES["standard"])
|
| 148 |
+
|
| 149 |
+
# Prepare context based on research mode
|
| 150 |
+
analysis_scope = {
|
| 151 |
+
"quick": "Provide a concise analysis focusing on key highlights and immediate insights.",
|
| 152 |
+
"standard": "Provide a balanced analysis with key insights and recommendations.",
|
| 153 |
+
"deep": "Provide a comprehensive, detailed analysis with in-depth insights and strategic recommendations.",
|
| 154 |
+
"custom": f"Provide analysis focusing on the selected categories: {', '.join(categories)}."
|
| 155 |
+
}.get(research_mode, "Provide a balanced analysis.")
|
| 156 |
|
| 157 |
# Prepare user prompt with company data
|
| 158 |
user_prompt = (
|
| 159 |
+
f"# Company Analysis: {company.name}\n\n"
|
| 160 |
+
f"## Research Mode: {research_mode.capitalize()}\n"
|
| 161 |
+
f"{analysis_scope}\n\n"
|
| 162 |
+
f"## Company Overview\n"
|
| 163 |
+
f"**Description**: {company.description or 'No description available'}\n"
|
| 164 |
+
f"**Industry**: {getattr(company, 'industry', 'Not specified')}\n"
|
| 165 |
+
f"**Location**: {getattr(company, 'location', 'Not specified')}\n"
|
| 166 |
+
f"**CEO**: {getattr(company, 'ceo', 'Not specified')}\n"
|
| 167 |
+
f"**Founded**: {getattr(company, 'founded', 'Not available')}\n\n"
|
| 168 |
+
f"## Analysis Categories\n"
|
| 169 |
+
f"Please analyze this company's position in the following areas:\n"
|
| 170 |
f"• {chr(10)+'• '.join(categories)}\n\n"
|
| 171 |
+
f"## Additional Context\n"
|
| 172 |
+
f"- Compare with {business_name} where relevant\n"
|
| 173 |
+
f"- Focus on specific, data-driven insights\n"
|
| 174 |
+
f"- Include actionable recommendations\n"
|
| 175 |
+
f"- Be concise yet comprehensive\n\n"
|
| 176 |
+
f"## Output Format\n"
|
| 177 |
+
f"Please structure your response with clear headings for each category."
|
| 178 |
)
|
| 179 |
|
| 180 |
logger.info(f"Generating insights for {company.name}...")
|
|
|
|
| 251 |
detail=f"Failed to fetch company details: {str(e)}"
|
| 252 |
)
|
| 253 |
|
| 254 |
+
class CompanyAnalysisRequest(BaseModel):
|
| 255 |
+
"""Request model for company analysis."""
|
| 256 |
+
company_info: Dict[str, Any]
|
| 257 |
+
research_mode: str = "standard"
|
| 258 |
+
insight_selection: List[str]
|
| 259 |
+
|
| 260 |
+
class Config:
|
| 261 |
+
schema_extra = {
|
| 262 |
+
"example": {
|
| 263 |
+
"company_info": {
|
| 264 |
+
"name": "Tesla, Inc.",
|
| 265 |
+
"location": "Austin, Texas, USA",
|
| 266 |
+
"industry": "Automotive & Energy",
|
| 267 |
+
"ceo": "Elon Musk",
|
| 268 |
+
"founded": 2003
|
| 269 |
+
},
|
| 270 |
+
"research_mode": "standard",
|
| 271 |
+
"insight_selection": [
|
| 272 |
+
"company_profile",
|
| 273 |
+
"financials",
|
| 274 |
+
"market_position",
|
| 275 |
+
"competitors"
|
| 276 |
+
]
|
| 277 |
+
}
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
@app.post("/api/v1/analyze", response_model=ReportResponse)
|
| 281 |
async def analyze_competitors(
|
| 282 |
+
payload: CompanyAnalysisRequest,
|
| 283 |
background_tasks: BackgroundTasks
|
| 284 |
):
|
| 285 |
"""
|
| 286 |
+
Main endpoint for company analysis with configurable research depth.
|
| 287 |
|
| 288 |
This endpoint:
|
| 289 |
+
1. Validates the research mode and insight selection
|
| 290 |
+
2. Searches for the company on Wikidata
|
| 291 |
+
3. Fetches detailed information based on selected insights
|
| 292 |
+
4. Generates insights using the LLM with appropriate detail level
|
| 293 |
+
|
| 294 |
+
Research Modes:
|
| 295 |
+
- quick: Brief overview (2-3 minutes)
|
| 296 |
+
- standard: Balanced analysis (5-7 minutes)
|
| 297 |
+
- deep: Comprehensive research (12-15 minutes)
|
| 298 |
+
- custom: Tailored to selected insights (variable time)
|
| 299 |
"""
|
| 300 |
# Initialize Wikidata client
|
| 301 |
wikidata = WikidataClient()
|
|
|
|
| 303 |
# Generate a unique request ID
|
| 304 |
request_id = str(uuid.uuid4())
|
| 305 |
|
| 306 |
+
# Validate research mode
|
| 307 |
+
if payload.research_mode not in RESEARCH_MODES:
|
| 308 |
+
raise HTTPException(
|
| 309 |
+
status_code=400,
|
| 310 |
+
detail=f"Invalid research mode. Must be one of: {', '.join(RESEARCH_MODES.keys())}"
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
# Validate insight selection
|
| 314 |
+
if not payload.insight_selection:
|
| 315 |
+
raise HTTPException(
|
| 316 |
+
status_code=400,
|
| 317 |
+
detail="At least one insight category must be selected"
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
# Get company info
|
| 321 |
+
company_info = payload.company_info
|
| 322 |
+
company_name = company_info.get('name')
|
| 323 |
+
|
| 324 |
+
if not company_name:
|
| 325 |
+
raise HTTPException(
|
| 326 |
+
status_code=400,
|
| 327 |
+
detail="Company name is required in company_info"
|
| 328 |
+
)
|
| 329 |
|
| 330 |
try:
|
| 331 |
+
# Step 1: Search for the company on Wikidata
|
| 332 |
# Step 1: Search for the company on Wikidata
|
| 333 |
company_search = await wikidata.search_entity(company_name, limit=1)
|
| 334 |
if not company_search:
|
|
|
|
| 339 |
|
| 340 |
company_id = company_search[0]['id']
|
| 341 |
|
| 342 |
+
# Step 2: Get company details with all available properties
|
| 343 |
company_details = await wikidata.get_entity_details(company_id)
|
| 344 |
|
| 345 |
+
# Prepare company data for analysis
|
| 346 |
+
company_data = {
|
| 347 |
+
'name': str(company_info.get('name', '')),
|
| 348 |
+
'description': str(company_info.get('description') or
|
| 349 |
+
company_details.get('descriptions', {}).get('en', {}).get('value', '')),
|
| 350 |
+
'industry': str(company_info.get('industry') or
|
| 351 |
+
', '.join(company_details.get('industry', []))),
|
| 352 |
+
'founded': str(company_info.get('founded') or
|
| 353 |
+
company_details.get('inception', 'Not available')),
|
| 354 |
+
'location': str(company_info.get('location') or
|
| 355 |
+
company_details.get('location', 'Not specified')),
|
| 356 |
+
'ceo': str(company_info.get('ceo') or
|
| 357 |
+
company_details.get('ceo', 'Not specified')),
|
| 358 |
+
'website': str(company_details.get('official_website', '')),
|
| 359 |
+
'metrics': {
|
| 360 |
+
'revenue': company_details.get('revenue'),
|
| 361 |
+
'employees': company_details.get('number_of_employees')
|
| 362 |
+
}
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
# Step 3: Get competitors if in insight selection
|
| 366 |
competitors = []
|
| 367 |
+
if 'competitors' in payload.insight_selection:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
competitors = await wikidata.get_related_entities(
|
| 369 |
company_id,
|
| 370 |
relation_type='competitor', # P1592
|
| 371 |
limit=min(5, getattr(settings, 'MAX_COMPETITORS', 5))
|
| 372 |
)
|
| 373 |
+
|
| 374 |
+
# Add competitor data to company data
|
| 375 |
+
company_data['competitors'] = [
|
| 376 |
+
comp.get('labels', {}).get('en', {}).get('value', 'Unknown')
|
| 377 |
+
for comp in competitors[:5] # Limit to top 5
|
| 378 |
+
]
|
| 379 |
|
| 380 |
+
# Step 4: Generate insights using the LLM
|
| 381 |
+
company_insight = await generate_insights(
|
| 382 |
+
company=CompanyData(**company_data),
|
| 383 |
+
categories=payload.insight_selection,
|
| 384 |
+
business_name=company_name,
|
| 385 |
+
research_mode=payload.research_mode
|
| 386 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
| 388 |
+
# Step 5: Generate competitor insights if needed
|
| 389 |
+
competitor_insights = []
|
| 390 |
+
if 'competitors' in payload.insight_selection and competitors:
|
| 391 |
+
competitor_tasks = []
|
| 392 |
+
for comp in competitors[:3]: # Limit to top 3 competitors for analysis
|
| 393 |
+
try:
|
| 394 |
+
comp_details = await wikidata.get_entity_details(comp['id'])
|
| 395 |
+
comp_data = {
|
| 396 |
+
'name': str(comp.get('labels', {}).get('en', {}).get('value', 'Unknown')),
|
| 397 |
+
'description': str(comp_details.get('descriptions', {}).get('en', {}).get('value', '')),
|
| 398 |
+
'industry': str(', '.join(comp_details.get('industry', []))),
|
| 399 |
+
'founded': str(comp_details.get('inception', '')),
|
| 400 |
+
'location': str(comp_details.get('location', 'Unknown')),
|
| 401 |
+
'website': str(comp_details.get('official_website', '')),
|
| 402 |
+
'metrics': {
|
| 403 |
+
'revenue': comp_details.get('revenue'),
|
| 404 |
+
'employees': comp_details.get('number_of_employees')
|
| 405 |
+
}
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
task = generate_insights(
|
| 409 |
+
company=CompanyData(**comp_data),
|
| 410 |
+
categories=payload.insight_selection,
|
| 411 |
+
business_name=company_name,
|
| 412 |
+
research_mode=payload.research_mode
|
| 413 |
+
)
|
| 414 |
+
competitor_tasks.append(task)
|
| 415 |
+
except Exception as e:
|
| 416 |
+
logger.warning(f"Failed to process competitor {comp.get('id')}: {str(e)}")
|
| 417 |
|
| 418 |
+
# Run competitor analysis in parallel
|
| 419 |
+
if competitor_tasks:
|
| 420 |
+
competitor_insights = await asyncio.gather(*competitor_tasks, return_exceptions=True)
|
| 421 |
+
competitor_insights = [ci for ci in competitor_insights if not isinstance(ci, Exception)]
|
|
|
|
|
|
|
|
|
|
| 422 |
|
| 423 |
+
# Prepare the response with company and competitor insights
|
| 424 |
+
combined_summary = f"# {company_name} Analysis Report\n\n"
|
| 425 |
|
| 426 |
+
# Add report metadata
|
| 427 |
+
mode_info = RESEARCH_MODES.get(payload.research_mode, {})
|
| 428 |
+
combined_summary += f"## Report Overview\n"
|
| 429 |
+
combined_summary += f"- **Research Mode**: {payload.research_mode.capitalize()} ({mode_info.get('description', '')})\n"
|
| 430 |
+
combined_summary += f"- **Generated At**: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')}\n"
|
| 431 |
+
combined_summary += f"- **Analysis Scope**: {', '.join(payload.insight_selection)}\n\n"
|
|
|
|
| 432 |
|
| 433 |
+
# Add company insights
|
| 434 |
+
combined_summary += "## Company Analysis\n\n"
|
| 435 |
+
combined_summary += company_insight.summary + "\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
|
| 437 |
+
# Add competitor insights if available
|
| 438 |
+
if competitor_insights:
|
| 439 |
+
combined_summary += "## Competitive Analysis\n\n"
|
| 440 |
+
for i, insight in enumerate(competitor_insights, 1):
|
| 441 |
+
if insight and insight.summary:
|
| 442 |
+
combined_summary += f"### {insight.company.name}\n"
|
| 443 |
+
# Add a brief summary of the competitor
|
| 444 |
+
combined_summary += f"*{insight.company.description or 'No description available'}*\n\n"
|
| 445 |
+
|
| 446 |
+
# Add key insights (first 2-3 bullet points)
|
| 447 |
+
points = [line.strip() for line in insight.summary.split('•') if line.strip()][:3]
|
| 448 |
+
if points:
|
| 449 |
+
combined_summary += "**Key Insights**:\n"
|
| 450 |
+
for point in points:
|
| 451 |
+
combined_summary += f"• {point.strip()}\n"
|
| 452 |
+
combined_summary += "\n"
|
| 453 |
|
| 454 |
+
# Add summary and recommendations
|
| 455 |
+
combined_summary += "## Summary & Recommendations\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
|
| 457 |
+
# Generate a summary based on research mode
|
| 458 |
+
if payload.research_mode == "quick":
|
| 459 |
+
combined_summary += "### Quick Snapshot\n"
|
| 460 |
+
combined_summary += f"Key highlights and immediate insights about {company_name}.\n\n"
|
| 461 |
+
elif payload.research_mode == "standard":
|
| 462 |
+
combined_summary += "### Standard Analysis\n"
|
| 463 |
+
combined_summary += f"A balanced overview of {company_name}'s position with key insights.\n\n"
|
| 464 |
+
elif payload.research_mode == "deep":
|
| 465 |
+
combined_summary += "### Deep Dive Analysis\n"
|
| 466 |
+
combined_summary += f"Comprehensive research on {company_name} with detailed insights and strategic recommendations.\n\n"
|
| 467 |
+
else: # custom
|
| 468 |
+
combined_summary += f"### Custom Analysis\n"
|
| 469 |
+
combined_summary += f"Analysis focused on: {', '.join(payload.insight_selection)}.\n\n"
|
| 470 |
|
| 471 |
+
# Add data sources section
|
| 472 |
+
combined_summary += "## Data Sources\n\n"
|
| 473 |
+
combined_summary += "This report was generated using data from the following sources:\n"
|
| 474 |
+
combined_summary += "1. **Wikidata** - Free and open knowledge base\n"
|
| 475 |
+
combined_summary += "2. **Public Company Data** - Various public sources\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
|
| 477 |
+
# Add disclaimer
|
| 478 |
+
combined_summary += "*Note: This report is for informational purposes only and should not be considered as financial or investment advice.*\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
|
| 480 |
+
# Check if PDF export is requested
|
| 481 |
+
pdf_url = None
|
| 482 |
+
if hasattr(payload, 'preferences') and payload.preferences and hasattr(payload.preferences, 'export_format') and payload.preferences.export_format == 'pdf':
|
| 483 |
+
pdf_url = f"/api/v1/exports/{request_id}.pdf"
|
| 484 |
+
# In a real implementation, you would generate the PDF here
|
| 485 |
+
# background_tasks.add_task(generate_pdf_export, request_id, combined_summary, [])
|
| 486 |
|
| 487 |
+
# Prepare the final response
|
| 488 |
report = ReportResponse(
|
| 489 |
request_id=request_id,
|
| 490 |
company_name=company_name,
|
| 491 |
generated_at=datetime.utcnow().isoformat(),
|
| 492 |
summary=combined_summary,
|
| 493 |
+
metrics=company_insight.metrics if hasattr(company_insight, 'metrics') else [],
|
| 494 |
+
insights=[], # Individual insights are included in the summary
|
| 495 |
+
recommendations=[], # Recommendations are included in the summary
|
| 496 |
sources=[], # Sources are included in the summary
|
| 497 |
+
pdf_url=pdf_url
|
| 498 |
)
|
| 499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
return report
|
| 501 |
|
| 502 |
except HTTPException:
|