Spaces:

Seth0330
/

PostGen

Sleeping

App Files Files Community

Seth commited on Jan 4

Commit

6d1e595

1 Parent(s): b3f7679

update

Browse files

Files changed (8) hide show

README.md +8 -2
backend/app/database.py +50 -1
backend/app/main.py +178 -23
backend/app/models.py +3 -0
backend/app/schemas.py +3 -0
backend/app/services/agentic_planner.py +273 -0
backend/app/services/ai_service.py +25 -4
backend/app/services/asset_analyzer.py +170 -0

README.md CHANGED Viewed

@@ -13,10 +13,12 @@ PostGen is a comprehensive LinkedIn content scheduling application that integrat
 ## Features
-- **AI Content Generation**: Uses GPT to generate engaging LinkedIn posts
 - **Canva Integration**: Access and apply Canva brand templates using the Autofill API
 - **LinkedIn Scheduling**: Schedule and publish posts directly to LinkedIn
-- **Asset Repository**: Upload and organize marketing materials by product categories
 - **Smart Scheduler**: Agentic AI automatically generates content schedules based on date ranges, products, and post types
 - **Product Categories**: Support for OCR, P2P, and O2C products with sub-categories
@@ -52,6 +54,10 @@ Create a `.env` file in the backend directory with the following variables:
 OPENAI_API_KEY=your_openai_api_key
 OPENAI_MODEL=gpt-4o
 # Canva (optional - can be passed via API)
 CANVA_ACCESS_TOKEN=your_canva_access_token

 ## Features
+- **Agentic AI System**: Multi-step AI planning that analyzes assets, extracts insights, and generates context-aware content
+- **Document Parsing**: Automatic OCR analysis of uploaded documents using integrated OCR API
+- **AI Content Generation**: Uses GPT with extracted asset insights to generate engaging, authentic LinkedIn posts
 - **Canva Integration**: Access and apply Canva brand templates using the Autofill API
 - **LinkedIn Scheduling**: Schedule and publish posts directly to LinkedIn
+- **Asset Repository**: Upload and organize marketing materials by product categories with automatic content extraction
 - **Smart Scheduler**: Agentic AI automatically generates content schedules based on date ranges, products, and post types
 - **Product Categories**: Support for OCR, P2P, and O2C products with sub-categories
 OPENAI_API_KEY=your_openai_api_key
 OPENAI_MODEL=gpt-4o
+# OCR API (for document parsing and asset analysis)
+OCR_API_URL=https://seth0330-ezofisocr.hf.space
+OCR_API_KEY=your_ocr_api_key
 # Canva (optional - can be passed via API)
 CANVA_ACCESS_TOKEN=your_canva_access_token

backend/app/database.py CHANGED Viewed

@@ -253,6 +253,9 @@ def init_db():
                             sub_category VARCHAR,
                             size INTEGER,
                             extra_metadata JSONB,
                             created_at TIMESTAMP DEFAULT NOW()
                         )""",
                         """CREATE TABLE IF NOT EXISTS posts (
@@ -288,7 +291,53 @@ def init_db():
                     for sql in tables_sql:
                         cursor.execute(sql)
                     conn.commit()
-                    cursor.close()
                     conn.close()
                     print("✓ CockroachDB tables created successfully (using direct psycopg2 connection)")
                     return True

                             sub_category VARCHAR,
                             size INTEGER,
                             extra_metadata JSONB,
+                            extracted_content JSONB,
+                            analysis_status VARCHAR DEFAULT 'pending',
+                            analyzed_at TIMESTAMP,
                             created_at TIMESTAMP DEFAULT NOW()
                         )""",
                         """CREATE TABLE IF NOT EXISTS posts (
                     for sql in tables_sql:
                         cursor.execute(sql)
                     conn.commit()
+                    # Add new columns to assets table if they don't exist (migration)
+                    try:
+                        cursor = conn.cursor()
+                        # Check and add extracted_content column
+                        cursor.execute("""
+                            DO $$
+                            BEGIN
+                                IF NOT EXISTS (
+                                    SELECT 1 FROM information_schema.columns
+                                    WHERE table_name='assets' AND column_name='extracted_content'
+                                ) THEN
+                                    ALTER TABLE assets ADD COLUMN extracted_content JSONB;
+                                END IF;
+                            END $$;
+                        """)
+                        # Check and add analysis_status column
+                        cursor.execute("""
+                            DO $$
+                            BEGIN
+                                IF NOT EXISTS (
+                                    SELECT 1 FROM information_schema.columns
+                                    WHERE table_name='assets' AND column_name='analysis_status'
+                                ) THEN
+                                    ALTER TABLE assets ADD COLUMN analysis_status VARCHAR DEFAULT 'pending';
+                                END IF;
+                            END $$;
+                        """)
+                        # Check and add analyzed_at column
+                        cursor.execute("""
+                            DO $$
+                            BEGIN
+                                IF NOT EXISTS (
+                                    SELECT 1 FROM information_schema.columns
+                                    WHERE table_name='assets' AND column_name='analyzed_at'
+                                ) THEN
+                                    ALTER TABLE assets ADD COLUMN analyzed_at TIMESTAMP;
+                                END IF;
+                            END $$;
+                        """)
+                        conn.commit()
+                        cursor.close()
+                        print("✓ Database migration completed (added new asset columns)")
+                    except Exception as migration_error:
+                        # Migration might fail if columns already exist, that's okay
+                        print(f"Migration note: {migration_error}")
                     conn.close()
                     print("✓ CockroachDB tables created successfully (using direct psycopg2 connection)")
                     return True

backend/app/main.py CHANGED Viewed

@@ -16,6 +16,8 @@ from app.schemas import (
 from app.services.canva_service import CanvaService
 from app.services.linkedin_service import LinkedInService
 from app.services.ai_service import AIService
 from app.database import init_db, get_db, get_direct_psycopg2_connection, ensure_default_user
 from sqlalchemy.orm import Session
@@ -54,6 +56,8 @@ async def startup_event():
 # Services
 ai_service = AIService()
 # ---- API Endpoints ----
@@ -135,16 +139,35 @@ async def get_linkedin_profile(access_token: str):
 # ---- AI Content Generation ----
 @app.post("/api/ai/generate-content", response_model=AIContentResponse)
-async def generate_ai_content(request: AIContentRequest):
-    """Generate LinkedIn post content using GPT"""
     try:
-        # Get assets context if provided
-        assets_context = None
         if request.assets:
-            # In a real implementation, fetch asset descriptions from database
-            assets_context = f"User has {len(request.assets)} assets available"
-        response = await ai_service.generate_content(request, assets_context)
         return response
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"AI generation failed: {str(e)}")
@@ -278,6 +301,98 @@ async def upload_asset(
                 else:
                     raise commit_error
             return {
                 "id": db_asset.id,
                 "name": db_asset.name,
@@ -285,6 +400,7 @@ async def upload_asset(
                 "product_category": db_asset.product_category,
                 "sub_category": db_asset.sub_category,
                 "size": db_asset.size,
                 "created_at": db_asset.created_at.isoformat() if hasattr(db_asset, 'created_at') else datetime.utcnow().isoformat()
             }
         except Exception as db_error:
@@ -332,6 +448,9 @@ async def get_assets(
                     "product_category": asset.product_category,
                     "sub_category": asset.sub_category,
                     "size": asset.size,
                     "created_at": asset.created_at
                 })
         except Exception as orm_error:
@@ -345,14 +464,16 @@ async def get_assets(
                         cursor = conn.cursor()
                         if product_category and product_category != "all":
                             cursor.execute("""
-                                SELECT id, name, file_path, file_type, product_category, sub_category, size, created_at
                                 FROM assets
                                 WHERE product_category = %s
                                 ORDER BY created_at DESC
                             """, (product_category,))
                         else:
                             cursor.execute("""
-                                SELECT id, name, file_path, file_type, product_category, sub_category, size, created_at
                                 FROM assets
                                 ORDER BY created_at DESC
                             """)
@@ -369,7 +490,10 @@ async def get_assets(
                                 "product_category": row[4],
                                 "sub_category": row[5],
                                 "size": row[6],
-                                "created_at": row[7]
                             })
                     except Exception as psycopg2_error:
                         print(f"Direct psycopg2 query failed: {psycopg2_error}")
@@ -549,23 +673,54 @@ async def get_posts():
 # ---- Campaign Management ----
 @app.post("/api/campaigns/generate")
-async def generate_campaign(campaign_data: dict):
     """Generate a campaign schedule using agentic AI"""
     try:
-        # This would use AI to generate a schedule based on:
-        # - Date range
-        # - Products to focus on
-        # - Post types mix
-        # - Posts per week
-        # Mock implementation
-        return {
-            "campaign_id": 1,
-            "generated_posts": 12,
-            "schedule": []
-        }
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 # ---- Frontend static serving ----
 # Path calculation: /app/backend/app/main.py -> /app/frontend/dist

 from app.services.canva_service import CanvaService
 from app.services.linkedin_service import LinkedInService
 from app.services.ai_service import AIService
+from app.services.asset_analyzer import AssetAnalyzer
+from app.services.agentic_planner import AgenticPlanner
 from app.database import init_db, get_db, get_direct_psycopg2_connection, ensure_default_user
 from sqlalchemy.orm import Session
 # Services
 ai_service = AIService()
+asset_analyzer = AssetAnalyzer()
+agentic_planner = AgenticPlanner()
 # ---- API Endpoints ----
 # ---- AI Content Generation ----
 @app.post("/api/ai/generate-content", response_model=AIContentResponse)
+async def generate_ai_content(request: AIContentRequest, db: Session = Depends(get_db)):
+    """Generate LinkedIn post content using GPT with agentic asset context"""
     try:
+        # Fetch assets with extracted content if provided
+        asset_insights = None
         if request.assets:
+            try:
+                from app.models import Asset
+                # Query assets from database
+                db_assets = db.query(Asset).filter(Asset.id.in_(request.assets)).all()
+                asset_insights = []
+                for asset in db_assets:
+                    asset_dict = {
+                        "id": asset.id,
+                        "name": asset.name,
+                        "product_category": asset.product_category,
+                        "extracted_content": asset.extracted_content if hasattr(asset, 'extracted_content') else None
+                    }
+                    asset_insights.append(asset_dict)
+            except Exception as db_error:
+                # Fallback if database query fails
+                print(f"Could not fetch assets from DB: {db_error}")
+                asset_insights = None
+        response = await ai_service.generate_content(
+            request,
+            assets_context=None,
+            asset_insights=asset_insights
+        )
         return response
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"AI generation failed: {str(e)}")
                 else:
                     raise commit_error
+            # Analyze asset using OCR API (agentic step)
+            asset_id = db_asset.id
+            if file_type in ["document", "image"]:
+                # Update status to processing
+                try:
+                    conn = get_direct_psycopg2_connection()
+                    if conn:
+                        cursor = conn.cursor()
+                        cursor.execute("""
+                            UPDATE assets
+                            SET analysis_status = 'processing'
+                            WHERE id = %s
+                        """, (asset_id,))
+                        conn.commit()
+                        cursor.close()
+                        conn.close()
+                except Exception as update_error:
+                    print(f"Could not update analysis status: {update_error}")
+                # Analyze asset asynchronously (don't block response)
+                try:
+                    analysis_result = await asset_analyzer.analyze_document(str(file_path))
+                    if analysis_result.get("success") and analysis_result.get("extracted_content"):
+                        # Update asset with extracted content
+                        try:
+                            conn = get_direct_psycopg2_connection()
+                            if conn:
+                                cursor = conn.cursor()
+                                import json
+                                extracted_json = json.dumps(analysis_result["extracted_content"])
+                                cursor.execute("""
+                                    UPDATE assets
+                                    SET extracted_content = %s::jsonb,
+                                        analysis_status = 'completed',
+                                        analyzed_at = NOW()
+                                    WHERE id = %s
+                                """, (extracted_json, asset_id))
+                                conn.commit()
+                                cursor.close()
+                                conn.close()
+                                print(f"✓ Asset {asset_id} analyzed successfully")
+                        except Exception as update_error:
+                            print(f"Could not save extracted content: {update_error}")
+                            # Try to mark as failed
+                            try:
+                                conn = get_direct_psycopg2_connection()
+                                if conn:
+                                    cursor = conn.cursor()
+                                    cursor.execute("""
+                                        UPDATE assets
+                                        SET analysis_status = 'failed'
+                                        WHERE id = %s
+                                    """, (asset_id,))
+                                    conn.commit()
+                                    cursor.close()
+                                    conn.close()
+                            except:
+                                pass
+                    else:
+                        # Mark as failed if analysis didn't succeed
+                        try:
+                            conn = get_direct_psycopg2_connection()
+                            if conn:
+                                cursor = conn.cursor()
+                                cursor.execute("""
+                                    UPDATE assets
+                                    SET analysis_status = 'failed'
+                                    WHERE id = %s
+                                """, (asset_id,))
+                                conn.commit()
+                                cursor.close()
+                                conn.close()
+                        except:
+                            pass
+                except Exception as analysis_error:
+                    print(f"Asset analysis error: {analysis_error}")
+                    # Mark as failed
+                    try:
+                        conn = get_direct_psycopg2_connection()
+                        if conn:
+                            cursor = conn.cursor()
+                            cursor.execute("""
+                                UPDATE assets
+                                SET analysis_status = 'failed'
+                                WHERE id = %s
+                            """, (asset_id,))
+                            conn.commit()
+                            cursor.close()
+                            conn.close()
+                    except:
+                        pass
             return {
                 "id": db_asset.id,
                 "name": db_asset.name,
                 "product_category": db_asset.product_category,
                 "sub_category": db_asset.sub_category,
                 "size": db_asset.size,
+                "analysis_status": "processing" if file_type in ["document", "image"] else "pending",
                 "created_at": db_asset.created_at.isoformat() if hasattr(db_asset, 'created_at') else datetime.utcnow().isoformat()
             }
         except Exception as db_error:
                     "product_category": asset.product_category,
                     "sub_category": asset.sub_category,
                     "size": asset.size,
+                    "extracted_content": asset.extracted_content if hasattr(asset, 'extracted_content') else None,
+                    "analysis_status": asset.analysis_status if hasattr(asset, 'analysis_status') else None,
+                    "analyzed_at": asset.analyzed_at.isoformat() if hasattr(asset, 'analyzed_at') and asset.analyzed_at else None,
                     "created_at": asset.created_at
                 })
         except Exception as orm_error:
                         cursor = conn.cursor()
                         if product_category and product_category != "all":
                             cursor.execute("""
+                                SELECT id, name, file_path, file_type, product_category, sub_category, size,
+                                       extracted_content, analysis_status, analyzed_at, created_at
                                 FROM assets
                                 WHERE product_category = %s
                                 ORDER BY created_at DESC
                             """, (product_category,))
                         else:
                             cursor.execute("""
+                                SELECT id, name, file_path, file_type, product_category, sub_category, size,
+                                       extracted_content, analysis_status, analyzed_at, created_at
                                 FROM assets
                                 ORDER BY created_at DESC
                             """)
                                 "product_category": row[4],
                                 "sub_category": row[5],
                                 "size": row[6],
+                                "extracted_content": row[7] if len(row) > 7 else None,
+                                "analysis_status": row[8] if len(row) > 8 else None,
+                                "analyzed_at": row[9].isoformat() if len(row) > 9 and row[9] else None,
+                                "created_at": row[10] if len(row) > 10 else row[6]
                             })
                     except Exception as psycopg2_error:
                         print(f"Direct psycopg2 query failed: {psycopg2_error}")
 # ---- Campaign Management ----
 @app.post("/api/campaigns/generate")
+async def generate_campaign(campaign_data: dict, db: Session = Depends(get_db)):
     """Generate a campaign schedule using agentic AI"""
     try:
+        from datetime import datetime
+        from app.models import Asset
+        # Extract campaign parameters
+        date_range_start = datetime.fromisoformat(campaign_data.get("date_range_start").replace("Z", "+00:00"))
+        date_range_end = datetime.fromisoformat(campaign_data.get("date_range_end").replace("Z", "+00:00"))
+        products = campaign_data.get("products", [])
+        post_types = campaign_data.get("post_types", [])
+        posts_per_week = campaign_data.get("posts_per_week", 5)
+        # Fetch relevant assets for the selected products
+        assets = []
+        try:
+            # Query assets matching the product categories
+            db_assets = db.query(Asset).filter(Asset.product_category.in_(products)).all()
+            for asset in db_assets:
+                asset_dict = {
+                    "id": asset.id,
+                    "name": asset.name,
+                    "file_type": asset.file_type,
+                    "product_category": asset.product_category,
+                    "sub_category": asset.sub_category,
+                    "extracted_content": asset.extracted_content if hasattr(asset, 'extracted_content') else None,
+                    "analysis_status": asset.analysis_status if hasattr(asset, 'analysis_status') else None
+                }
+                assets.append(asset_dict)
+        except Exception as asset_error:
+            print(f"Could not fetch assets: {asset_error}")
+            # Continue without assets
+        # Use agentic planner to generate campaign
+        campaign_plan = await agentic_planner.plan_campaign(
+            date_range_start=date_range_start,
+            date_range_end=date_range_end,
+            products=products,
+            post_types=post_types,
+            posts_per_week=posts_per_week,
+            assets=assets
+        )
+        return campaign_plan
     except Exception as e:
+        import traceback
+        print(f"Campaign generation error: {traceback.format_exc()}")
+        raise HTTPException(status_code=500, detail=f"Campaign generation failed: {str(e)}")
 # ---- Frontend static serving ----
 # Path calculation: /app/backend/app/main.py -> /app/frontend/dist

backend/app/models.py CHANGED Viewed

@@ -43,6 +43,9 @@ class Asset(Base):
     sub_category = Column(String, nullable=True)
     size = Column(Integer)  # in bytes
     extra_metadata = Column(JSON, nullable=True)  # Renamed from 'metadata' to avoid SQLAlchemy conflict
     created_at = Column(DateTime, default=datetime.utcnow)
     user = relationship("User", back_populates="assets")

     sub_category = Column(String, nullable=True)
     size = Column(Integer)  # in bytes
     extra_metadata = Column(JSON, nullable=True)  # Renamed from 'metadata' to avoid SQLAlchemy conflict
+    extracted_content = Column(JSON, nullable=True)  # OCR/extracted content from document parsing API
+    analysis_status = Column(String, default="pending")  # 'pending', 'processing', 'completed', 'failed'
+    analyzed_at = Column(DateTime, nullable=True)
     created_at = Column(DateTime, default=datetime.utcnow)
     user = relationship("User", back_populates="assets")

backend/app/schemas.py CHANGED Viewed

@@ -34,6 +34,9 @@ class AssetResponse(BaseModel):
     product_category: str
     sub_category: Optional[str] = None
     size: int
     created_at: datetime
     class Config:

     product_category: str
     sub_category: Optional[str] = None
     size: int
+    extracted_content: Optional[Dict[str, Any]] = None
+    analysis_status: Optional[str] = None
+    analyzed_at: Optional[datetime] = None
     created_at: datetime
     class Config:

backend/app/services/agentic_planner.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import os
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from openai import OpenAI
+from app.services.asset_analyzer import AssetAnalyzer
+class AgenticPlanner:
+    """Agentic AI service for planning and generating content campaigns"""
+    def __init__(self):
+        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
+        self.model = os.getenv("OPENAI_MODEL", "gpt-4o")
+        self.asset_analyzer = AssetAnalyzer()
+    async def plan_campaign(
+        self,
+        date_range_start: datetime,
+        date_range_end: datetime,
+        products: List[str],
+        post_types: List[str],
+        posts_per_week: int,
+        assets: Optional[List[Dict[str, Any]]] = None
+    ) -> Dict[str, Any]:
+        """
+        Agentic planning: Multi-step process to create a content campaign
+        Steps:
+        1. Analyze available assets and extract insights
+        2. Plan content distribution across date range
+        3. Select appropriate post types for each content piece
+        4. Generate content themes and topics
+        5. Optimize posting schedule
+        """
+        # Step 1: Analyze assets and extract insights
+        asset_insights = await self._analyze_assets(assets or [])
+        # Step 2: Calculate campaign parameters
+        total_days = (date_range_end - date_range_start).days + 1
+        total_weeks = max(1, total_days / 7)
+        total_posts = int(posts_per_week * total_weeks)
+        # Step 3: Generate content plan using AI
+        content_plan = await self._generate_content_plan(
+            products=products,
+            post_types=post_types,
+            total_posts=total_posts,
+            date_range_start=date_range_start,
+            date_range_end=date_range_end,
+            asset_insights=asset_insights
+        )
+        # Step 4: Create detailed schedule
+        schedule = self._create_schedule(
+            content_plan=content_plan,
+            date_range_start=date_range_start,
+            date_range_end=date_range_end,
+            posts_per_week=posts_per_week
+        )
+        return {
+            "campaign_id": None,  # Will be set when saved to DB
+            "generated_posts": len(schedule),
+            "schedule": schedule,
+            "asset_insights": asset_insights,
+            "content_themes": content_plan.get("themes", [])
+        }
+    async def _analyze_assets(self, assets: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Analyze all assets and extract key insights"""
+        insights_by_category = {}
+        total_assets = len(assets)
+        for asset in assets:
+            category = asset.get("product_category", "ocr")
+            if category not in insights_by_category:
+                insights_by_category[category] = {
+                    "count": 0,
+                    "insights": [],
+                    "assets": []
+                }
+            insights_by_category[category]["count"] += 1
+            # Extract insights from analyzed content
+            extracted_content = asset.get("extracted_content")
+            if extracted_content:
+                insight = self.asset_analyzer.extract_key_insights(extracted_content)
+                if insight:
+                    insights_by_category[category]["insights"].append(insight)
+                    insights_by_category[category]["assets"].append({
+                        "id": asset.get("id"),
+                        "name": asset.get("name"),
+                        "insight": insight
+                    })
+        return {
+            "total_assets": total_assets,
+            "by_category": insights_by_category,
+            "summary": f"Analyzed {total_assets} assets across {len(insights_by_category)} product categories"
+        }
+    async def _generate_content_plan(
+        self,
+        products: List[str],
+        post_types: List[str],
+        total_posts: int,
+        date_range_start: datetime,
+        date_range_end: datetime,
+        asset_insights: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Use AI to generate a content plan"""
+        product_descriptions = {
+            "ocr": "Intelligent Document Parsing (OCR) - AI-powered document processing and data extraction",
+            "p2p": "Purchase To Pay (P2P) - End-to-end procurement and accounts payable automation",
+            "o2c": "Order to Cash (O2C) - Complete order management and accounts receivable workflow"
+        }
+        post_type_descriptions = {
+            "carousel": "Multi-slide carousel post with visual storytelling",
+            "cover_content": "Post with cover image and engaging text content",
+            "content_only": "Text-only post focused on valuable insights",
+            "webinar": "Webinar invitation post to promote an upcoming event"
+        }
+        # Build asset context
+        asset_context = ""
+        if asset_insights.get("by_category"):
+            asset_context = "\n\nAvailable Asset Insights:\n"
+            for category, data in asset_insights["by_category"].items():
+                asset_context += f"\n{product_descriptions.get(category, category)}:\n"
+                asset_context += f"- {data['count']} assets available\n"
+                if data.get("insights"):
+                    asset_context += f"- Key insights: {len(data['insights'])} extracted\n"
+        system_prompt = """You are an expert content strategist for B2B SaaS marketing on LinkedIn.
+Your task is to create a comprehensive content plan that:
+- Distributes content evenly across the date range
+- Varies post types to maintain engagement
+- Uses available assets and insights effectively
+- Creates diverse, valuable content themes
+- Follows LinkedIn best practices
+Return a JSON structure with themes and recommended post types for each theme."""
+        user_prompt = f"""Create a content plan for a LinkedIn campaign:
+Products to focus on: {', '.join([product_descriptions.get(p, p) for p in products])}
+Available post types: {', '.join([post_type_descriptions.get(pt, pt) for pt in post_types])}
+Total posts needed: {total_posts}
+Date range: {date_range_start.strftime('%Y-%m-%d')} to {date_range_end.strftime('%Y-%m-%d')}
+{asset_context}
+Generate {total_posts} content themes with:
+- Theme title
+- Brief description
+- Recommended post type
+- Product category
+- Key talking points
+Return as JSON with structure:
+{{
+  "themes": [
+    {{
+      "title": "Theme title",
+      "description": "Brief description",
+      "post_type": "carousel|cover_content|content_only|webinar",
+      "product_category": "ocr|p2p|o2c",
+      "talking_points": ["point1", "point2", "point3"]
+    }}
+  ]
+}}"""
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.8,
+                max_tokens=2000,
+                response_format={"type": "json_object"}
+            )
+            import json
+            content_plan = json.loads(response.choices[0].message.content)
+            return content_plan
+        except Exception as e:
+            # Fallback: Generate basic themes
+            return self._generate_fallback_themes(products, post_types, total_posts)
+    def _generate_fallback_themes(
+        self,
+        products: List[str],
+        post_types: List[str],
+        total_posts: int
+    ) -> Dict[str, Any]:
+        """Generate basic themes if AI fails"""
+        themes = []
+        theme_templates = {
+            "ocr": [
+                "Document Automation Benefits",
+                "OCR Technology Overview",
+                "Efficiency Gains with Intelligent Parsing"
+            ],
+            "p2p": [
+                "Streamline Procurement Process",
+                "Accounts Payable Automation",
+                "Purchase Request Workflow"
+            ],
+            "o2c": [
+                "Order Management Best Practices",
+                "Sales Order Processing",
+                "Accounts Receivable Optimization"
+            ]
+        }
+        posts_per_product = total_posts // len(products) if products else total_posts
+        for product in products:
+            for i in range(posts_per_product):
+                theme_name = theme_templates.get(product, ["Product Feature"])[i % len(theme_templates.get(product, ["Feature"]))]
+                themes.append({
+                    "title": f"{theme_name} - Post {i+1}",
+                    "description": f"Content about {product}",
+                    "post_type": post_types[i % len(post_types)] if post_types else "content_only",
+                    "product_category": product,
+                    "talking_points": ["Key benefit 1", "Key benefit 2", "Use case"]
+                })
+        return {"themes": themes[:total_posts]}
+    def _create_schedule(
+        self,
+        content_plan: Dict[str, Any],
+        date_range_start: datetime,
+        date_range_end: datetime,
+        posts_per_week: int
+    ) -> List[Dict[str, Any]]:
+        """Create a detailed posting schedule"""
+        themes = content_plan.get("themes", [])
+        if not themes:
+            return []
+        schedule = []
+        total_days = (date_range_end - date_range_start).days + 1
+        days_between_posts = max(1, int(7 / posts_per_week))  # Distribute across week
+        current_date = date_range_start
+        theme_index = 0
+        while current_date <= date_range_end and theme_index < len(themes):
+            theme = themes[theme_index]
+            # Schedule post for this date
+            schedule.append({
+                "date": current_date.isoformat(),
+                "time": "10:00",  # Default time, can be optimized
+                "theme": theme.get("title", ""),
+                "description": theme.get("description", ""),
+                "post_type": theme.get("post_type", "content_only"),
+                "product_category": theme.get("product_category", "ocr"),
+                "talking_points": theme.get("talking_points", []),
+                "status": "planned"
+            })
+            # Move to next date
+            current_date += timedelta(days=days_between_posts)
+            theme_index += 1
+        return schedule

backend/app/services/ai_service.py CHANGED Viewed

@@ -9,8 +9,13 @@ class AIService:
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
         self.model = os.getenv("OPENAI_MODEL", "gpt-4o")
-    async def generate_content(self, request: AIContentRequest, assets_context: Optional[str] = None) -> AIContentResponse:
-        """Generate LinkedIn post content using GPT"""
         product_descriptions = {
             "ocr": "Intelligent Document Parsing (OCR) - AI-powered document processing and data extraction",
@@ -25,9 +30,24 @@ class AIService:
             "webinar": "A webinar invitation post to promote an upcoming event"
         }
         system_prompt = f"""You are an expert LinkedIn content creator specializing in B2B SaaS marketing.
 Create engaging, professional LinkedIn posts that:
 - Are authentic and valuable to the audience
 - Include relevant hashtags (3-5 hashtags)
 - Use emojis sparingly and appropriately
 - Are optimized for engagement
@@ -41,9 +61,10 @@ Post Type: {post_type_descriptions.get(request.post_type, request.post_type)}
 Post type: {post_type_descriptions.get(request.post_type, request.post_type)}
 {f'Additional context: {request.context}' if request.context else ''}
-{f'Available assets: {assets_context}' if assets_context else ''}
-Make it engaging, professional, and include relevant hashtags at the end."""
         try:
             response = self.client.chat.completions.create(

         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", ""))
         self.model = os.getenv("OPENAI_MODEL", "gpt-4o")
+    async def generate_content(
+        self,
+        request: AIContentRequest,
+        assets_context: Optional[str] = None,
+        asset_insights: Optional[List[Dict[str, Any]]] = None
+    ) -> AIContentResponse:
+        """Generate LinkedIn post content using GPT with agentic context from assets"""
         product_descriptions = {
             "ocr": "Intelligent Document Parsing (OCR) - AI-powered document processing and data extraction",
             "webinar": "A webinar invitation post to promote an upcoming event"
         }
+        # Build rich context from analyzed assets
+        asset_context_text = ""
+        if asset_insights:
+            asset_context_text = "\n\nRelevant Asset Insights (use these to create authentic, specific content):\n"
+            for asset in asset_insights:
+                if asset.get("extracted_content"):
+                    from app.services.asset_analyzer import AssetAnalyzer
+                    analyzer = AssetAnalyzer()
+                    insight = analyzer.extract_key_insights(asset.get("extracted_content"))
+                    if insight:
+                        asset_context_text += f"- {asset.get('name', 'Asset')}: {insight}\n"
+        elif assets_context:
+            asset_context_text = f"\n\nAvailable assets: {assets_context}"
         system_prompt = f"""You are an expert LinkedIn content creator specializing in B2B SaaS marketing.
 Create engaging, professional LinkedIn posts that:
 - Are authentic and valuable to the audience
+- Use specific insights from uploaded assets when available
 - Include relevant hashtags (3-5 hashtags)
 - Use emojis sparingly and appropriately
 - Are optimized for engagement
 Post type: {post_type_descriptions.get(request.post_type, request.post_type)}
 {f'Additional context: {request.context}' if request.context else ''}
+{asset_context_text}
+Make it engaging, professional, and include relevant hashtags at the end.
+If asset insights are provided, incorporate specific details from them to make the content more authentic and valuable."""
         try:
             response = self.client.chat.completions.create(

backend/app/services/asset_analyzer.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import httpx
+import os
+from typing import Dict, Any, Optional
+from pathlib import Path
+class AssetAnalyzer:
+    """Service to analyze uploaded assets using OCR API and extract content"""
+    def __init__(self):
+        self.ocr_api_url = os.getenv("OCR_API_URL", "https://seth0330-ezofisocr.hf.space")
+        self.ocr_api_key = os.getenv("OCR_API_KEY", "")
+    async def analyze_document(self, file_path: str, key_fields: Optional[str] = None) -> Dict[str, Any]:
+        """
+        Analyze a document using the OCR API
+        Args:
+            file_path: Path to the file to analyze
+            key_fields: Optional comma-separated string of key fields to extract
+        Returns:
+            Dictionary containing extracted content and metadata
+        """
+        try:
+            file_path_obj = Path(file_path)
+            if not file_path_obj.exists():
+                return {
+                    "success": False,
+                    "error": "File not found",
+                    "extracted_content": None
+                }
+            # Determine if this is a document that should be analyzed
+            file_type = self._get_file_type(file_path)
+            if file_type not in ["document", "image"]:
+                return {
+                    "success": True,
+                    "extracted_content": None,
+                    "message": f"File type {file_type} not suitable for OCR analysis"
+                }
+            # Read file content
+            with open(file_path, 'rb') as f:
+                files = {'file': (file_path_obj.name, f, self._get_content_type(file_path))}
+                data = {}
+                if key_fields:
+                    data['key_fields'] = key_fields
+                headers = {}
+                if self.ocr_api_key:
+                    headers["X-API-Key"] = self.ocr_api_key
+                async with httpx.AsyncClient(timeout=60.0) as client:
+                    response = await client.post(
+                        f"{self.ocr_api_url}/api/extract",
+                        headers=headers,
+                        files=files,
+                        data=data
+                    )
+                    if response.status_code == 200:
+                        result = response.json()
+                        return {
+                            "success": True,
+                            "extracted_content": result,
+                            "message": "Document analyzed successfully"
+                        }
+                    else:
+                        return {
+                            "success": False,
+                            "error": f"OCR API returned status {response.status_code}: {response.text}",
+                            "extracted_content": None
+                        }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "extracted_content": None
+            }
+    async def analyze_image(self, file_path: str) -> Dict[str, Any]:
+        """
+        Analyze an image using GPT-4 Vision (for screenshots, infographics, etc.)
+        This is a placeholder for future implementation
+        Args:
+            file_path: Path to the image file
+        Returns:
+            Dictionary containing image analysis
+        """
+        # TODO: Implement GPT-4 Vision analysis for images
+        # For now, return a placeholder
+        return {
+            "success": True,
+            "extracted_content": {
+                "type": "image",
+                "message": "Image analysis not yet implemented"
+            },
+            "message": "Image analysis placeholder"
+        }
+    def _get_file_type(self, file_path: str) -> str:
+        """Determine file type from extension"""
+        ext = Path(file_path).suffix.lower()
+        document_extensions = ['.pdf', '.doc', '.docx', '.txt', '.rtf']
+        image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg']
+        video_extensions = ['.mp4', '.avi', '.mov', '.wmv', '.flv']
+        if ext in document_extensions:
+            return "document"
+        elif ext in image_extensions:
+            return "image"
+        elif ext in video_extensions:
+            return "video"
+        else:
+            return "unknown"
+    def _get_content_type(self, file_path: str) -> str:
+        """Get MIME type for file"""
+        ext = Path(file_path).suffix.lower()
+        content_types = {
+            '.pdf': 'application/pdf',
+            '.doc': 'application/msword',
+            '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+            '.txt': 'text/plain',
+            '.jpg': 'image/jpeg',
+            '.jpeg': 'image/jpeg',
+            '.png': 'image/png',
+            '.gif': 'image/gif',
+        }
+        return content_types.get(ext, 'application/octet-stream')
+    def extract_key_insights(self, extracted_content: Dict[str, Any]) -> str:
+        """
+        Extract key insights from OCR results to use as context for AI content generation
+        Args:
+            extracted_content: The JSON response from OCR API
+        Returns:
+            Formatted string with key insights
+        """
+        if not extracted_content:
+            return ""
+        insights = []
+        # Extract structured data if available
+        if isinstance(extracted_content, dict):
+            # Look for common fields
+            for key, value in extracted_content.items():
+                if value and key not in ['raw_text', 'confidence', 'metadata']:
+                    if isinstance(value, (str, int, float)):
+                        insights.append(f"{key}: {value}")
+                    elif isinstance(value, list) and len(value) > 0:
+                        insights.append(f"{key}: {', '.join(map(str, value[:5]))}")
+            # Extract raw text if available
+            if 'raw_text' in extracted_content:
+                raw_text = extracted_content['raw_text']
+                if isinstance(raw_text, str) and len(raw_text) > 0:
+                    # Summarize long text
+                    if len(raw_text) > 500:
+                        insights.append(f"Document content: {raw_text[:500]}...")
+                    else:
+                        insights.append(f"Document content: {raw_text}")
+        return "\n".join(insights) if insights else "No specific insights extracted"