Seth commited on
Commit
6d1e595
·
1 Parent(s): b3f7679
README.md CHANGED
@@ -13,10 +13,12 @@ PostGen is a comprehensive LinkedIn content scheduling application that integrat
13
 
14
  ## Features
15
 
16
- - **AI Content Generation**: Uses GPT to generate engaging LinkedIn posts
 
 
17
  - **Canva Integration**: Access and apply Canva brand templates using the Autofill API
18
  - **LinkedIn Scheduling**: Schedule and publish posts directly to LinkedIn
19
- - **Asset Repository**: Upload and organize marketing materials by product categories
20
  - **Smart Scheduler**: Agentic AI automatically generates content schedules based on date ranges, products, and post types
21
  - **Product Categories**: Support for OCR, P2P, and O2C products with sub-categories
22
 
@@ -52,6 +54,10 @@ Create a `.env` file in the backend directory with the following variables:
52
  OPENAI_API_KEY=your_openai_api_key
53
  OPENAI_MODEL=gpt-4o
54
 
 
 
 
 
55
  # Canva (optional - can be passed via API)
56
  CANVA_ACCESS_TOKEN=your_canva_access_token
57
 
 
13
 
14
  ## Features
15
 
16
+ - **Agentic AI System**: Multi-step AI planning that analyzes assets, extracts insights, and generates context-aware content
17
+ - **Document Parsing**: Automatic OCR analysis of uploaded documents using integrated OCR API
18
+ - **AI Content Generation**: Uses GPT with extracted asset insights to generate engaging, authentic LinkedIn posts
19
  - **Canva Integration**: Access and apply Canva brand templates using the Autofill API
20
  - **LinkedIn Scheduling**: Schedule and publish posts directly to LinkedIn
21
+ - **Asset Repository**: Upload and organize marketing materials by product categories with automatic content extraction
22
  - **Smart Scheduler**: Agentic AI automatically generates content schedules based on date ranges, products, and post types
23
  - **Product Categories**: Support for OCR, P2P, and O2C products with sub-categories
24
 
 
54
  OPENAI_API_KEY=your_openai_api_key
55
  OPENAI_MODEL=gpt-4o
56
 
57
+ # OCR API (for document parsing and asset analysis)
58
+ OCR_API_URL=https://seth0330-ezofisocr.hf.space
59
+ OCR_API_KEY=your_ocr_api_key
60
+
61
  # Canva (optional - can be passed via API)
62
  CANVA_ACCESS_TOKEN=your_canva_access_token
63
 
backend/app/database.py CHANGED
@@ -253,6 +253,9 @@ def init_db():
253
  sub_category VARCHAR,
254
  size INTEGER,
255
  extra_metadata JSONB,
 
 
 
256
  created_at TIMESTAMP DEFAULT NOW()
257
  )""",
258
  """CREATE TABLE IF NOT EXISTS posts (
@@ -288,7 +291,53 @@ def init_db():
288
  for sql in tables_sql:
289
  cursor.execute(sql)
290
  conn.commit()
291
- cursor.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  conn.close()
293
  print("✓ CockroachDB tables created successfully (using direct psycopg2 connection)")
294
  return True
 
253
  sub_category VARCHAR,
254
  size INTEGER,
255
  extra_metadata JSONB,
256
+ extracted_content JSONB,
257
+ analysis_status VARCHAR DEFAULT 'pending',
258
+ analyzed_at TIMESTAMP,
259
  created_at TIMESTAMP DEFAULT NOW()
260
  )""",
261
  """CREATE TABLE IF NOT EXISTS posts (
 
291
  for sql in tables_sql:
292
  cursor.execute(sql)
293
  conn.commit()
294
+
295
+ # Add new columns to assets table if they don't exist (migration)
296
+ try:
297
+ cursor = conn.cursor()
298
+ # Check and add extracted_content column
299
+ cursor.execute("""
300
+ DO $$
301
+ BEGIN
302
+ IF NOT EXISTS (
303
+ SELECT 1 FROM information_schema.columns
304
+ WHERE table_name='assets' AND column_name='extracted_content'
305
+ ) THEN
306
+ ALTER TABLE assets ADD COLUMN extracted_content JSONB;
307
+ END IF;
308
+ END $$;
309
+ """)
310
+ # Check and add analysis_status column
311
+ cursor.execute("""
312
+ DO $$
313
+ BEGIN
314
+ IF NOT EXISTS (
315
+ SELECT 1 FROM information_schema.columns
316
+ WHERE table_name='assets' AND column_name='analysis_status'
317
+ ) THEN
318
+ ALTER TABLE assets ADD COLUMN analysis_status VARCHAR DEFAULT 'pending';
319
+ END IF;
320
+ END $$;
321
+ """)
322
+ # Check and add analyzed_at column
323
+ cursor.execute("""
324
+ DO $$
325
+ BEGIN
326
+ IF NOT EXISTS (
327
+ SELECT 1 FROM information_schema.columns
328
+ WHERE table_name='assets' AND column_name='analyzed_at'
329
+ ) THEN
330
+ ALTER TABLE assets ADD COLUMN analyzed_at TIMESTAMP;
331
+ END IF;
332
+ END $$;
333
+ """)
334
+ conn.commit()
335
+ cursor.close()
336
+ print("✓ Database migration completed (added new asset columns)")
337
+ except Exception as migration_error:
338
+ # Migration might fail if columns already exist, that's okay
339
+ print(f"Migration note: {migration_error}")
340
+
341
  conn.close()
342
  print("✓ CockroachDB tables created successfully (using direct psycopg2 connection)")
343
  return True
backend/app/main.py CHANGED
@@ -16,6 +16,8 @@ from app.schemas import (
16
  from app.services.canva_service import CanvaService
17
  from app.services.linkedin_service import LinkedInService
18
  from app.services.ai_service import AIService
 
 
19
  from app.database import init_db, get_db, get_direct_psycopg2_connection, ensure_default_user
20
  from sqlalchemy.orm import Session
21
 
@@ -54,6 +56,8 @@ async def startup_event():
54
 
55
  # Services
56
  ai_service = AIService()
 
 
57
 
58
  # ---- API Endpoints ----
59
 
@@ -135,16 +139,35 @@ async def get_linkedin_profile(access_token: str):
135
  # ---- AI Content Generation ----
136
 
137
  @app.post("/api/ai/generate-content", response_model=AIContentResponse)
138
- async def generate_ai_content(request: AIContentRequest):
139
- """Generate LinkedIn post content using GPT"""
140
  try:
141
- # Get assets context if provided
142
- assets_context = None
143
  if request.assets:
144
- # In a real implementation, fetch asset descriptions from database
145
- assets_context = f"User has {len(request.assets)} assets available"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
- response = await ai_service.generate_content(request, assets_context)
 
 
 
 
148
  return response
149
  except Exception as e:
150
  raise HTTPException(status_code=500, detail=f"AI generation failed: {str(e)}")
@@ -278,6 +301,98 @@ async def upload_asset(
278
  else:
279
  raise commit_error
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  return {
282
  "id": db_asset.id,
283
  "name": db_asset.name,
@@ -285,6 +400,7 @@ async def upload_asset(
285
  "product_category": db_asset.product_category,
286
  "sub_category": db_asset.sub_category,
287
  "size": db_asset.size,
 
288
  "created_at": db_asset.created_at.isoformat() if hasattr(db_asset, 'created_at') else datetime.utcnow().isoformat()
289
  }
290
  except Exception as db_error:
@@ -332,6 +448,9 @@ async def get_assets(
332
  "product_category": asset.product_category,
333
  "sub_category": asset.sub_category,
334
  "size": asset.size,
 
 
 
335
  "created_at": asset.created_at
336
  })
337
  except Exception as orm_error:
@@ -345,14 +464,16 @@ async def get_assets(
345
  cursor = conn.cursor()
346
  if product_category and product_category != "all":
347
  cursor.execute("""
348
- SELECT id, name, file_path, file_type, product_category, sub_category, size, created_at
 
349
  FROM assets
350
  WHERE product_category = %s
351
  ORDER BY created_at DESC
352
  """, (product_category,))
353
  else:
354
  cursor.execute("""
355
- SELECT id, name, file_path, file_type, product_category, sub_category, size, created_at
 
356
  FROM assets
357
  ORDER BY created_at DESC
358
  """)
@@ -369,7 +490,10 @@ async def get_assets(
369
  "product_category": row[4],
370
  "sub_category": row[5],
371
  "size": row[6],
372
- "created_at": row[7]
 
 
 
373
  })
374
  except Exception as psycopg2_error:
375
  print(f"Direct psycopg2 query failed: {psycopg2_error}")
@@ -549,23 +673,54 @@ async def get_posts():
549
  # ---- Campaign Management ----
550
 
551
  @app.post("/api/campaigns/generate")
552
- async def generate_campaign(campaign_data: dict):
553
  """Generate a campaign schedule using agentic AI"""
554
  try:
555
- # This would use AI to generate a schedule based on:
556
- # - Date range
557
- # - Products to focus on
558
- # - Post types mix
559
- # - Posts per week
560
 
561
- # Mock implementation
562
- return {
563
- "campaign_id": 1,
564
- "generated_posts": 12,
565
- "schedule": []
566
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  except Exception as e:
568
- raise HTTPException(status_code=500, detail=str(e))
 
 
569
 
570
  # ---- Frontend static serving ----
571
  # Path calculation: /app/backend/app/main.py -> /app/frontend/dist
 
16
  from app.services.canva_service import CanvaService
17
  from app.services.linkedin_service import LinkedInService
18
  from app.services.ai_service import AIService
19
+ from app.services.asset_analyzer import AssetAnalyzer
20
+ from app.services.agentic_planner import AgenticPlanner
21
  from app.database import init_db, get_db, get_direct_psycopg2_connection, ensure_default_user
22
  from sqlalchemy.orm import Session
23
 
 
56
 
57
  # Services
58
  ai_service = AIService()
59
+ asset_analyzer = AssetAnalyzer()
60
+ agentic_planner = AgenticPlanner()
61
 
62
  # ---- API Endpoints ----
63
 
 
139
  # ---- AI Content Generation ----
140
 
141
  @app.post("/api/ai/generate-content", response_model=AIContentResponse)
142
+ async def generate_ai_content(request: AIContentRequest, db: Session = Depends(get_db)):
143
+ """Generate LinkedIn post content using GPT with agentic asset context"""
144
  try:
145
+ # Fetch assets with extracted content if provided
146
+ asset_insights = None
147
  if request.assets:
148
+ try:
149
+ from app.models import Asset
150
+ # Query assets from database
151
+ db_assets = db.query(Asset).filter(Asset.id.in_(request.assets)).all()
152
+ asset_insights = []
153
+ for asset in db_assets:
154
+ asset_dict = {
155
+ "id": asset.id,
156
+ "name": asset.name,
157
+ "product_category": asset.product_category,
158
+ "extracted_content": asset.extracted_content if hasattr(asset, 'extracted_content') else None
159
+ }
160
+ asset_insights.append(asset_dict)
161
+ except Exception as db_error:
162
+ # Fallback if database query fails
163
+ print(f"Could not fetch assets from DB: {db_error}")
164
+ asset_insights = None
165
 
166
+ response = await ai_service.generate_content(
167
+ request,
168
+ assets_context=None,
169
+ asset_insights=asset_insights
170
+ )
171
  return response
172
  except Exception as e:
173
  raise HTTPException(status_code=500, detail=f"AI generation failed: {str(e)}")
 
301
  else:
302
  raise commit_error
303
 
304
+ # Analyze asset using OCR API (agentic step)
305
+ asset_id = db_asset.id
306
+ if file_type in ["document", "image"]:
307
+ # Update status to processing
308
+ try:
309
+ conn = get_direct_psycopg2_connection()
310
+ if conn:
311
+ cursor = conn.cursor()
312
+ cursor.execute("""
313
+ UPDATE assets
314
+ SET analysis_status = 'processing'
315
+ WHERE id = %s
316
+ """, (asset_id,))
317
+ conn.commit()
318
+ cursor.close()
319
+ conn.close()
320
+ except Exception as update_error:
321
+ print(f"Could not update analysis status: {update_error}")
322
+
323
+ # Analyze asset asynchronously (don't block response)
324
+ try:
325
+ analysis_result = await asset_analyzer.analyze_document(str(file_path))
326
+ if analysis_result.get("success") and analysis_result.get("extracted_content"):
327
+ # Update asset with extracted content
328
+ try:
329
+ conn = get_direct_psycopg2_connection()
330
+ if conn:
331
+ cursor = conn.cursor()
332
+ import json
333
+ extracted_json = json.dumps(analysis_result["extracted_content"])
334
+ cursor.execute("""
335
+ UPDATE assets
336
+ SET extracted_content = %s::jsonb,
337
+ analysis_status = 'completed',
338
+ analyzed_at = NOW()
339
+ WHERE id = %s
340
+ """, (extracted_json, asset_id))
341
+ conn.commit()
342
+ cursor.close()
343
+ conn.close()
344
+ print(f"✓ Asset {asset_id} analyzed successfully")
345
+ except Exception as update_error:
346
+ print(f"Could not save extracted content: {update_error}")
347
+ # Try to mark as failed
348
+ try:
349
+ conn = get_direct_psycopg2_connection()
350
+ if conn:
351
+ cursor = conn.cursor()
352
+ cursor.execute("""
353
+ UPDATE assets
354
+ SET analysis_status = 'failed'
355
+ WHERE id = %s
356
+ """, (asset_id,))
357
+ conn.commit()
358
+ cursor.close()
359
+ conn.close()
360
+ except:
361
+ pass
362
+ else:
363
+ # Mark as failed if analysis didn't succeed
364
+ try:
365
+ conn = get_direct_psycopg2_connection()
366
+ if conn:
367
+ cursor = conn.cursor()
368
+ cursor.execute("""
369
+ UPDATE assets
370
+ SET analysis_status = 'failed'
371
+ WHERE id = %s
372
+ """, (asset_id,))
373
+ conn.commit()
374
+ cursor.close()
375
+ conn.close()
376
+ except:
377
+ pass
378
+ except Exception as analysis_error:
379
+ print(f"Asset analysis error: {analysis_error}")
380
+ # Mark as failed
381
+ try:
382
+ conn = get_direct_psycopg2_connection()
383
+ if conn:
384
+ cursor = conn.cursor()
385
+ cursor.execute("""
386
+ UPDATE assets
387
+ SET analysis_status = 'failed'
388
+ WHERE id = %s
389
+ """, (asset_id,))
390
+ conn.commit()
391
+ cursor.close()
392
+ conn.close()
393
+ except:
394
+ pass
395
+
396
  return {
397
  "id": db_asset.id,
398
  "name": db_asset.name,
 
400
  "product_category": db_asset.product_category,
401
  "sub_category": db_asset.sub_category,
402
  "size": db_asset.size,
403
+ "analysis_status": "processing" if file_type in ["document", "image"] else "pending",
404
  "created_at": db_asset.created_at.isoformat() if hasattr(db_asset, 'created_at') else datetime.utcnow().isoformat()
405
  }
406
  except Exception as db_error:
 
448
  "product_category": asset.product_category,
449
  "sub_category": asset.sub_category,
450
  "size": asset.size,
451
+ "extracted_content": asset.extracted_content if hasattr(asset, 'extracted_content') else None,
452
+ "analysis_status": asset.analysis_status if hasattr(asset, 'analysis_status') else None,
453
+ "analyzed_at": asset.analyzed_at.isoformat() if hasattr(asset, 'analyzed_at') and asset.analyzed_at else None,
454
  "created_at": asset.created_at
455
  })
456
  except Exception as orm_error:
 
464
  cursor = conn.cursor()
465
  if product_category and product_category != "all":
466
  cursor.execute("""
467
+ SELECT id, name, file_path, file_type, product_category, sub_category, size,
468
+ extracted_content, analysis_status, analyzed_at, created_at
469
  FROM assets
470
  WHERE product_category = %s
471
  ORDER BY created_at DESC
472
  """, (product_category,))
473
  else:
474
  cursor.execute("""
475
+ SELECT id, name, file_path, file_type, product_category, sub_category, size,
476
+ extracted_content, analysis_status, analyzed_at, created_at
477
  FROM assets
478
  ORDER BY created_at DESC
479
  """)
 
490
  "product_category": row[4],
491
  "sub_category": row[5],
492
  "size": row[6],
493
+ "extracted_content": row[7] if len(row) > 7 else None,
494
+ "analysis_status": row[8] if len(row) > 8 else None,
495
+ "analyzed_at": row[9].isoformat() if len(row) > 9 and row[9] else None,
496
+ "created_at": row[10] if len(row) > 10 else row[6]
497
  })
498
  except Exception as psycopg2_error:
499
  print(f"Direct psycopg2 query failed: {psycopg2_error}")
 
673
  # ---- Campaign Management ----
674
 
675
  @app.post("/api/campaigns/generate")
676
+ async def generate_campaign(campaign_data: dict, db: Session = Depends(get_db)):
677
  """Generate a campaign schedule using agentic AI"""
678
  try:
679
+ from datetime import datetime
680
+ from app.models import Asset
 
 
 
681
 
682
+ # Extract campaign parameters
683
+ date_range_start = datetime.fromisoformat(campaign_data.get("date_range_start").replace("Z", "+00:00"))
684
+ date_range_end = datetime.fromisoformat(campaign_data.get("date_range_end").replace("Z", "+00:00"))
685
+ products = campaign_data.get("products", [])
686
+ post_types = campaign_data.get("post_types", [])
687
+ posts_per_week = campaign_data.get("posts_per_week", 5)
688
+
689
+ # Fetch relevant assets for the selected products
690
+ assets = []
691
+ try:
692
+ # Query assets matching the product categories
693
+ db_assets = db.query(Asset).filter(Asset.product_category.in_(products)).all()
694
+ for asset in db_assets:
695
+ asset_dict = {
696
+ "id": asset.id,
697
+ "name": asset.name,
698
+ "file_type": asset.file_type,
699
+ "product_category": asset.product_category,
700
+ "sub_category": asset.sub_category,
701
+ "extracted_content": asset.extracted_content if hasattr(asset, 'extracted_content') else None,
702
+ "analysis_status": asset.analysis_status if hasattr(asset, 'analysis_status') else None
703
+ }
704
+ assets.append(asset_dict)
705
+ except Exception as asset_error:
706
+ print(f"Could not fetch assets: {asset_error}")
707
+ # Continue without assets
708
+
709
+ # Use agentic planner to generate campaign
710
+ campaign_plan = await agentic_planner.plan_campaign(
711
+ date_range_start=date_range_start,
712
+ date_range_end=date_range_end,
713
+ products=products,
714
+ post_types=post_types,
715
+ posts_per_week=posts_per_week,
716
+ assets=assets
717
+ )
718
+
719
+ return campaign_plan
720
  except Exception as e:
721
+ import traceback
722
+ print(f"Campaign generation error: {traceback.format_exc()}")
723
+ raise HTTPException(status_code=500, detail=f"Campaign generation failed: {str(e)}")
724
 
725
  # ---- Frontend static serving ----
726
  # Path calculation: /app/backend/app/main.py -> /app/frontend/dist
backend/app/models.py CHANGED
@@ -43,6 +43,9 @@ class Asset(Base):
43
  sub_category = Column(String, nullable=True)
44
  size = Column(Integer) # in bytes
45
  extra_metadata = Column(JSON, nullable=True) # Renamed from 'metadata' to avoid SQLAlchemy conflict
 
 
 
46
  created_at = Column(DateTime, default=datetime.utcnow)
47
 
48
  user = relationship("User", back_populates="assets")
 
43
  sub_category = Column(String, nullable=True)
44
  size = Column(Integer) # in bytes
45
  extra_metadata = Column(JSON, nullable=True) # Renamed from 'metadata' to avoid SQLAlchemy conflict
46
+ extracted_content = Column(JSON, nullable=True) # OCR/extracted content from document parsing API
47
+ analysis_status = Column(String, default="pending") # 'pending', 'processing', 'completed', 'failed'
48
+ analyzed_at = Column(DateTime, nullable=True)
49
  created_at = Column(DateTime, default=datetime.utcnow)
50
 
51
  user = relationship("User", back_populates="assets")
backend/app/schemas.py CHANGED
@@ -34,6 +34,9 @@ class AssetResponse(BaseModel):
34
  product_category: str
35
  sub_category: Optional[str] = None
36
  size: int
 
 
 
37
  created_at: datetime
38
 
39
  class Config:
 
34
  product_category: str
35
  sub_category: Optional[str] = None
36
  size: int
37
+ extracted_content: Optional[Dict[str, Any]] = None
38
+ analysis_status: Optional[str] = None
39
+ analyzed_at: Optional[datetime] = None
40
  created_at: datetime
41
 
42
  class Config:
backend/app/services/agentic_planner.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict, Any, Optional
3
+ from datetime import datetime, timedelta
4
+ from openai import OpenAI
5
+ from app.services.asset_analyzer import AssetAnalyzer
6
+
7
+ class AgenticPlanner:
8
+ """Agentic AI service for planning and generating content campaigns"""
9
+
10
+ def __init__(self):
11
+ self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
12
+ self.model = os.getenv("OPENAI_MODEL", "gpt-4o")
13
+ self.asset_analyzer = AssetAnalyzer()
14
+
15
+ async def plan_campaign(
16
+ self,
17
+ date_range_start: datetime,
18
+ date_range_end: datetime,
19
+ products: List[str],
20
+ post_types: List[str],
21
+ posts_per_week: int,
22
+ assets: Optional[List[Dict[str, Any]]] = None
23
+ ) -> Dict[str, Any]:
24
+ """
25
+ Agentic planning: Multi-step process to create a content campaign
26
+
27
+ Steps:
28
+ 1. Analyze available assets and extract insights
29
+ 2. Plan content distribution across date range
30
+ 3. Select appropriate post types for each content piece
31
+ 4. Generate content themes and topics
32
+ 5. Optimize posting schedule
33
+ """
34
+
35
+ # Step 1: Analyze assets and extract insights
36
+ asset_insights = await self._analyze_assets(assets or [])
37
+
38
+ # Step 2: Calculate campaign parameters
39
+ total_days = (date_range_end - date_range_start).days + 1
40
+ total_weeks = max(1, total_days / 7)
41
+ total_posts = int(posts_per_week * total_weeks)
42
+
43
+ # Step 3: Generate content plan using AI
44
+ content_plan = await self._generate_content_plan(
45
+ products=products,
46
+ post_types=post_types,
47
+ total_posts=total_posts,
48
+ date_range_start=date_range_start,
49
+ date_range_end=date_range_end,
50
+ asset_insights=asset_insights
51
+ )
52
+
53
+ # Step 4: Create detailed schedule
54
+ schedule = self._create_schedule(
55
+ content_plan=content_plan,
56
+ date_range_start=date_range_start,
57
+ date_range_end=date_range_end,
58
+ posts_per_week=posts_per_week
59
+ )
60
+
61
+ return {
62
+ "campaign_id": None, # Will be set when saved to DB
63
+ "generated_posts": len(schedule),
64
+ "schedule": schedule,
65
+ "asset_insights": asset_insights,
66
+ "content_themes": content_plan.get("themes", [])
67
+ }
68
+
69
+ async def _analyze_assets(self, assets: List[Dict[str, Any]]) -> Dict[str, Any]:
70
+ """Analyze all assets and extract key insights"""
71
+ insights_by_category = {}
72
+ total_assets = len(assets)
73
+
74
+ for asset in assets:
75
+ category = asset.get("product_category", "ocr")
76
+ if category not in insights_by_category:
77
+ insights_by_category[category] = {
78
+ "count": 0,
79
+ "insights": [],
80
+ "assets": []
81
+ }
82
+
83
+ insights_by_category[category]["count"] += 1
84
+
85
+ # Extract insights from analyzed content
86
+ extracted_content = asset.get("extracted_content")
87
+ if extracted_content:
88
+ insight = self.asset_analyzer.extract_key_insights(extracted_content)
89
+ if insight:
90
+ insights_by_category[category]["insights"].append(insight)
91
+ insights_by_category[category]["assets"].append({
92
+ "id": asset.get("id"),
93
+ "name": asset.get("name"),
94
+ "insight": insight
95
+ })
96
+
97
+ return {
98
+ "total_assets": total_assets,
99
+ "by_category": insights_by_category,
100
+ "summary": f"Analyzed {total_assets} assets across {len(insights_by_category)} product categories"
101
+ }
102
+
103
+ async def _generate_content_plan(
104
+ self,
105
+ products: List[str],
106
+ post_types: List[str],
107
+ total_posts: int,
108
+ date_range_start: datetime,
109
+ date_range_end: datetime,
110
+ asset_insights: Dict[str, Any]
111
+ ) -> Dict[str, Any]:
112
+ """Use AI to generate a content plan"""
113
+
114
+ product_descriptions = {
115
+ "ocr": "Intelligent Document Parsing (OCR) - AI-powered document processing and data extraction",
116
+ "p2p": "Purchase To Pay (P2P) - End-to-end procurement and accounts payable automation",
117
+ "o2c": "Order to Cash (O2C) - Complete order management and accounts receivable workflow"
118
+ }
119
+
120
+ post_type_descriptions = {
121
+ "carousel": "Multi-slide carousel post with visual storytelling",
122
+ "cover_content": "Post with cover image and engaging text content",
123
+ "content_only": "Text-only post focused on valuable insights",
124
+ "webinar": "Webinar invitation post to promote an upcoming event"
125
+ }
126
+
127
+ # Build asset context
128
+ asset_context = ""
129
+ if asset_insights.get("by_category"):
130
+ asset_context = "\n\nAvailable Asset Insights:\n"
131
+ for category, data in asset_insights["by_category"].items():
132
+ asset_context += f"\n{product_descriptions.get(category, category)}:\n"
133
+ asset_context += f"- {data['count']} assets available\n"
134
+ if data.get("insights"):
135
+ asset_context += f"- Key insights: {len(data['insights'])} extracted\n"
136
+
137
+ system_prompt = """You are an expert content strategist for B2B SaaS marketing on LinkedIn.
138
+ Your task is to create a comprehensive content plan that:
139
+ - Distributes content evenly across the date range
140
+ - Varies post types to maintain engagement
141
+ - Uses available assets and insights effectively
142
+ - Creates diverse, valuable content themes
143
+ - Follows LinkedIn best practices
144
+
145
+ Return a JSON structure with themes and recommended post types for each theme."""
146
+
147
+ user_prompt = f"""Create a content plan for a LinkedIn campaign:
148
+
149
+ Products to focus on: {', '.join([product_descriptions.get(p, p) for p in products])}
150
+ Available post types: {', '.join([post_type_descriptions.get(pt, pt) for pt in post_types])}
151
+ Total posts needed: {total_posts}
152
+ Date range: {date_range_start.strftime('%Y-%m-%d')} to {date_range_end.strftime('%Y-%m-%d')}
153
+ {asset_context}
154
+
155
+ Generate {total_posts} content themes with:
156
+ - Theme title
157
+ - Brief description
158
+ - Recommended post type
159
+ - Product category
160
+ - Key talking points
161
+
162
+ Return as JSON with structure:
163
+ {{
164
+ "themes": [
165
+ {{
166
+ "title": "Theme title",
167
+ "description": "Brief description",
168
+ "post_type": "carousel|cover_content|content_only|webinar",
169
+ "product_category": "ocr|p2p|o2c",
170
+ "talking_points": ["point1", "point2", "point3"]
171
+ }}
172
+ ]
173
+ }}"""
174
+
175
+ try:
176
+ response = self.client.chat.completions.create(
177
+ model=self.model,
178
+ messages=[
179
+ {"role": "system", "content": system_prompt},
180
+ {"role": "user", "content": user_prompt}
181
+ ],
182
+ temperature=0.8,
183
+ max_tokens=2000,
184
+ response_format={"type": "json_object"}
185
+ )
186
+
187
+ import json
188
+ content_plan = json.loads(response.choices[0].message.content)
189
+ return content_plan
190
+ except Exception as e:
191
+ # Fallback: Generate basic themes
192
+ return self._generate_fallback_themes(products, post_types, total_posts)
193
+
194
+ def _generate_fallback_themes(
195
+ self,
196
+ products: List[str],
197
+ post_types: List[str],
198
+ total_posts: int
199
+ ) -> Dict[str, Any]:
200
+ """Generate basic themes if AI fails"""
201
+ themes = []
202
+ theme_templates = {
203
+ "ocr": [
204
+ "Document Automation Benefits",
205
+ "OCR Technology Overview",
206
+ "Efficiency Gains with Intelligent Parsing"
207
+ ],
208
+ "p2p": [
209
+ "Streamline Procurement Process",
210
+ "Accounts Payable Automation",
211
+ "Purchase Request Workflow"
212
+ ],
213
+ "o2c": [
214
+ "Order Management Best Practices",
215
+ "Sales Order Processing",
216
+ "Accounts Receivable Optimization"
217
+ ]
218
+ }
219
+
220
+ posts_per_product = total_posts // len(products) if products else total_posts
221
+ for product in products:
222
+ for i in range(posts_per_product):
223
+ theme_name = theme_templates.get(product, ["Product Feature"])[i % len(theme_templates.get(product, ["Feature"]))]
224
+ themes.append({
225
+ "title": f"{theme_name} - Post {i+1}",
226
+ "description": f"Content about {product}",
227
+ "post_type": post_types[i % len(post_types)] if post_types else "content_only",
228
+ "product_category": product,
229
+ "talking_points": ["Key benefit 1", "Key benefit 2", "Use case"]
230
+ })
231
+
232
+ return {"themes": themes[:total_posts]}
233
+
234
+ def _create_schedule(
235
+ self,
236
+ content_plan: Dict[str, Any],
237
+ date_range_start: datetime,
238
+ date_range_end: datetime,
239
+ posts_per_week: int
240
+ ) -> List[Dict[str, Any]]:
241
+ """Create a detailed posting schedule"""
242
+ themes = content_plan.get("themes", [])
243
+ if not themes:
244
+ return []
245
+
246
+ schedule = []
247
+ total_days = (date_range_end - date_range_start).days + 1
248
+ days_between_posts = max(1, int(7 / posts_per_week)) # Distribute across week
249
+
250
+ current_date = date_range_start
251
+ theme_index = 0
252
+
253
+ while current_date <= date_range_end and theme_index < len(themes):
254
+ theme = themes[theme_index]
255
+
256
+ # Schedule post for this date
257
+ schedule.append({
258
+ "date": current_date.isoformat(),
259
+ "time": "10:00", # Default time, can be optimized
260
+ "theme": theme.get("title", ""),
261
+ "description": theme.get("description", ""),
262
+ "post_type": theme.get("post_type", "content_only"),
263
+ "product_category": theme.get("product_category", "ocr"),
264
+ "talking_points": theme.get("talking_points", []),
265
+ "status": "planned"
266
+ })
267
+
268
+ # Move to next date
269
+ current_date += timedelta(days=days_between_posts)
270
+ theme_index += 1
271
+
272
+ return schedule
273
+
backend/app/services/ai_service.py CHANGED
@@ -9,8 +9,13 @@ class AIService:
9
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
10
  self.model = os.getenv("OPENAI_MODEL", "gpt-4o")
11
 
12
- async def generate_content(self, request: AIContentRequest, assets_context: Optional[str] = None) -> AIContentResponse:
13
- """Generate LinkedIn post content using GPT"""
 
 
 
 
 
14
 
15
  product_descriptions = {
16
  "ocr": "Intelligent Document Parsing (OCR) - AI-powered document processing and data extraction",
@@ -25,9 +30,24 @@ class AIService:
25
  "webinar": "A webinar invitation post to promote an upcoming event"
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  system_prompt = f"""You are an expert LinkedIn content creator specializing in B2B SaaS marketing.
29
  Create engaging, professional LinkedIn posts that:
30
  - Are authentic and valuable to the audience
 
31
  - Include relevant hashtags (3-5 hashtags)
32
  - Use emojis sparingly and appropriately
33
  - Are optimized for engagement
@@ -41,9 +61,10 @@ Post Type: {post_type_descriptions.get(request.post_type, request.post_type)}
41
  Post type: {post_type_descriptions.get(request.post_type, request.post_type)}
42
 
43
  {f'Additional context: {request.context}' if request.context else ''}
44
- {f'Available assets: {assets_context}' if assets_context else ''}
45
 
46
- Make it engaging, professional, and include relevant hashtags at the end."""
 
47
 
48
  try:
49
  response = self.client.chat.completions.create(
 
9
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
10
  self.model = os.getenv("OPENAI_MODEL", "gpt-4o")
11
 
12
+ async def generate_content(
13
+ self,
14
+ request: AIContentRequest,
15
+ assets_context: Optional[str] = None,
16
+ asset_insights: Optional[List[Dict[str, Any]]] = None
17
+ ) -> AIContentResponse:
18
+ """Generate LinkedIn post content using GPT with agentic context from assets"""
19
 
20
  product_descriptions = {
21
  "ocr": "Intelligent Document Parsing (OCR) - AI-powered document processing and data extraction",
 
30
  "webinar": "A webinar invitation post to promote an upcoming event"
31
  }
32
 
33
+ # Build rich context from analyzed assets
34
+ asset_context_text = ""
35
+ if asset_insights:
36
+ asset_context_text = "\n\nRelevant Asset Insights (use these to create authentic, specific content):\n"
37
+ for asset in asset_insights:
38
+ if asset.get("extracted_content"):
39
+ from app.services.asset_analyzer import AssetAnalyzer
40
+ analyzer = AssetAnalyzer()
41
+ insight = analyzer.extract_key_insights(asset.get("extracted_content"))
42
+ if insight:
43
+ asset_context_text += f"- {asset.get('name', 'Asset')}: {insight}\n"
44
+ elif assets_context:
45
+ asset_context_text = f"\n\nAvailable assets: {assets_context}"
46
+
47
  system_prompt = f"""You are an expert LinkedIn content creator specializing in B2B SaaS marketing.
48
  Create engaging, professional LinkedIn posts that:
49
  - Are authentic and valuable to the audience
50
+ - Use specific insights from uploaded assets when available
51
  - Include relevant hashtags (3-5 hashtags)
52
  - Use emojis sparingly and appropriately
53
  - Are optimized for engagement
 
61
  Post type: {post_type_descriptions.get(request.post_type, request.post_type)}
62
 
63
  {f'Additional context: {request.context}' if request.context else ''}
64
+ {asset_context_text}
65
 
66
+ Make it engaging, professional, and include relevant hashtags at the end.
67
+ If asset insights are provided, incorporate specific details from them to make the content more authentic and valuable."""
68
 
69
  try:
70
  response = self.client.chat.completions.create(
backend/app/services/asset_analyzer.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ import os
3
+ from typing import Dict, Any, Optional
4
+ from pathlib import Path
5
+
6
+ class AssetAnalyzer:
7
+ """Service to analyze uploaded assets using OCR API and extract content"""
8
+
9
+ def __init__(self):
10
+ self.ocr_api_url = os.getenv("OCR_API_URL", "https://seth0330-ezofisocr.hf.space")
11
+ self.ocr_api_key = os.getenv("OCR_API_KEY", "")
12
+
13
+ async def analyze_document(self, file_path: str, key_fields: Optional[str] = None) -> Dict[str, Any]:
14
+ """
15
+ Analyze a document using the OCR API
16
+
17
+ Args:
18
+ file_path: Path to the file to analyze
19
+ key_fields: Optional comma-separated string of key fields to extract
20
+
21
+ Returns:
22
+ Dictionary containing extracted content and metadata
23
+ """
24
+ try:
25
+ file_path_obj = Path(file_path)
26
+ if not file_path_obj.exists():
27
+ return {
28
+ "success": False,
29
+ "error": "File not found",
30
+ "extracted_content": None
31
+ }
32
+
33
+ # Determine if this is a document that should be analyzed
34
+ file_type = self._get_file_type(file_path)
35
+ if file_type not in ["document", "image"]:
36
+ return {
37
+ "success": True,
38
+ "extracted_content": None,
39
+ "message": f"File type {file_type} not suitable for OCR analysis"
40
+ }
41
+
42
+ # Read file content
43
+ with open(file_path, 'rb') as f:
44
+ files = {'file': (file_path_obj.name, f, self._get_content_type(file_path))}
45
+ data = {}
46
+ if key_fields:
47
+ data['key_fields'] = key_fields
48
+
49
+ headers = {}
50
+ if self.ocr_api_key:
51
+ headers["X-API-Key"] = self.ocr_api_key
52
+
53
+ async with httpx.AsyncClient(timeout=60.0) as client:
54
+ response = await client.post(
55
+ f"{self.ocr_api_url}/api/extract",
56
+ headers=headers,
57
+ files=files,
58
+ data=data
59
+ )
60
+
61
+ if response.status_code == 200:
62
+ result = response.json()
63
+ return {
64
+ "success": True,
65
+ "extracted_content": result,
66
+ "message": "Document analyzed successfully"
67
+ }
68
+ else:
69
+ return {
70
+ "success": False,
71
+ "error": f"OCR API returned status {response.status_code}: {response.text}",
72
+ "extracted_content": None
73
+ }
74
+ except Exception as e:
75
+ return {
76
+ "success": False,
77
+ "error": str(e),
78
+ "extracted_content": None
79
+ }
80
+
81
+ async def analyze_image(self, file_path: str) -> Dict[str, Any]:
82
+ """
83
+ Analyze an image using GPT-4 Vision (for screenshots, infographics, etc.)
84
+ This is a placeholder for future implementation
85
+
86
+ Args:
87
+ file_path: Path to the image file
88
+
89
+ Returns:
90
+ Dictionary containing image analysis
91
+ """
92
+ # TODO: Implement GPT-4 Vision analysis for images
93
+ # For now, return a placeholder
94
+ return {
95
+ "success": True,
96
+ "extracted_content": {
97
+ "type": "image",
98
+ "message": "Image analysis not yet implemented"
99
+ },
100
+ "message": "Image analysis placeholder"
101
+ }
102
+
103
+ def _get_file_type(self, file_path: str) -> str:
104
+ """Determine file type from extension"""
105
+ ext = Path(file_path).suffix.lower()
106
+ document_extensions = ['.pdf', '.doc', '.docx', '.txt', '.rtf']
107
+ image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
108
+ video_extensions = ['.mp4', '.avi', '.mov', '.wmv', '.flv']
109
+
110
+ if ext in document_extensions:
111
+ return "document"
112
+ elif ext in image_extensions:
113
+ return "image"
114
+ elif ext in video_extensions:
115
+ return "video"
116
+ else:
117
+ return "unknown"
118
+
119
+ def _get_content_type(self, file_path: str) -> str:
120
+ """Get MIME type for file"""
121
+ ext = Path(file_path).suffix.lower()
122
+ content_types = {
123
+ '.pdf': 'application/pdf',
124
+ '.doc': 'application/msword',
125
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
126
+ '.txt': 'text/plain',
127
+ '.jpg': 'image/jpeg',
128
+ '.jpeg': 'image/jpeg',
129
+ '.png': 'image/png',
130
+ '.gif': 'image/gif',
131
+ }
132
+ return content_types.get(ext, 'application/octet-stream')
133
+
134
+ def extract_key_insights(self, extracted_content: Dict[str, Any]) -> str:
135
+ """
136
+ Extract key insights from OCR results to use as context for AI content generation
137
+
138
+ Args:
139
+ extracted_content: The JSON response from OCR API
140
+
141
+ Returns:
142
+ Formatted string with key insights
143
+ """
144
+ if not extracted_content:
145
+ return ""
146
+
147
+ insights = []
148
+
149
+ # Extract structured data if available
150
+ if isinstance(extracted_content, dict):
151
+ # Look for common fields
152
+ for key, value in extracted_content.items():
153
+ if value and key not in ['raw_text', 'confidence', 'metadata']:
154
+ if isinstance(value, (str, int, float)):
155
+ insights.append(f"{key}: {value}")
156
+ elif isinstance(value, list) and len(value) > 0:
157
+ insights.append(f"{key}: {', '.join(map(str, value[:5]))}")
158
+
159
+ # Extract raw text if available
160
+ if 'raw_text' in extracted_content:
161
+ raw_text = extracted_content['raw_text']
162
+ if isinstance(raw_text, str) and len(raw_text) > 0:
163
+ # Summarize long text
164
+ if len(raw_text) > 500:
165
+ insights.append(f"Document content: {raw_text[:500]}...")
166
+ else:
167
+ insights.append(f"Document content: {raw_text}")
168
+
169
+ return "\n".join(insights) if insights else "No specific insights extracted"
170
+