File size: 13,615 Bytes
c8f8b24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
import os
import pandas as pd
from typing import List, Dict, Tuple
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

class DocumentProcessor:
    """Process and prepare documents for the RAG system"""
    
    def __init__(self):
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len,
        )
        
        # Embedded document content for demonstration
        self.embedded_content = {
            "financial_reports": {
                "content": """
                FinSolve Technologies Financial Performance 2024:
                
                Q4 2024 Financial Highlights:
                - Revenue: $2.6 billion (35% YoY growth)
                - Gross Margin: 64% (improvement from 58% in Q1)
                - Net Income: $325 million (18% YoY increase)
                - Operating Income: $650 million
                - Marketing Spend: $650 million for Q4
                
                Annual 2024 Summary:
                - Total Revenue: $9.4 billion (28% YoY increase)
                - Marketing Investment: $2.3 billion
                - Vendor Costs: $510 million
                - Cash Flow from Operations: $1.5 billion (14% YoY increase)
                
                Key Cost Drivers:
                - Vendor Services: $30M (18% increase)
                - Software Subscriptions: $25M (22% increase)
                - Employee Benefits: Increased 10% YoY
                
                Growth Metrics:
                - Customer Acquisition: 20% increase
                - Market Expansion: Successful entry into Europe and Southeast Asia
                - ROI: Marketing campaigns achieved 4.5x return on investment
                """,
                "metadata": {
                    "title": "Financial Reports 2024",
                    "department": "Finance",
                    "type": "Financial Report",
                    "content_type": "financial_reports"
                }
            },
            
            "marketing_reports": {
                "content": """
                FinSolve Technologies Marketing Performance 2024:
                
                Campaign Highlights:
                - New Customer Acquisition: 220,000 (Q4) - exceeded target
                - Digital Campaign ROI: 3.5x return on $5M investment
                - Brand Awareness: 15% growth YoY
                - Customer Engagement: 5% increase
                
                Geographic Expansion:
                - Europe: Successful market entry in UK, Germany, France
                - Southeast Asia: Strong performance in Indonesia, Thailand, Vietnam
                - Latin America: Expanded into Brazil, Mexico, Colombia
                
                Q4 2024 Specific Results:
                - Revenue Target: $11 million (achieved)
                - Marketing Spend: $2.5 million
                - Conversion Rate: 15.0% (target met)
                - Customer Retention: 85%
                
                Key Marketing Channels:
                - Digital Advertising: 40% of budget
                - Influencer Partnerships: Generated 600,000 impressions
                - Email Marketing: 25% open rate, 15% click-through rate
                - Event Marketing: 300 new enterprise leads from events
                
                Marketing Technology:
                - InstantPay feature launch: 52,000 sign-ups
                - Loyalty program: 50,000 enrolled customers
                - Social media: 25% engagement rate achieved
                """,
                "metadata": {
                    "title": "Marketing Reports 2024",
                    "department": "Marketing",
                    "type": "Marketing Report",
                    "content_type": "marketing_reports"
                }
            },
            
            "employee_data": {
                "content": """
                FinSolve Technologies Employee Information:
                
                Company Overview:
                - Founded: 2018
                - Headquarters: Bangalore, India
                - Global Operations: North America, Europe, Asia-Pacific
                - Employees: 2 million+ individual users, 10,000+ business clients served
                
                Employee Benefits:
                - Health Insurance: Family floater policy
                - Provident Fund: 12% employer & employee contribution
                - Maternity Benefit: 26 weeks paid leave
                - Flexible Work: Remote work and flexible hours available
                - Professional Development: Access to online learning platforms
                
                Leave Policies:
                - Annual Leave: 15-21 days/year
                - Sick Leave: 12 days/year
                - Casual Leave: 7 days/year
                - Maternity Leave: 26 weeks
                - Paternity Leave: 7-15 days
                
                Work Hours:
                - Standard: 9 hours/day (including 1 hour break)
                - Flexible timings available for eligible roles
                - Attendance tracking via biometric/HRMS app
                
                Performance Management:
                - Annual and mid-year reviews
                - Based on KPIs, goals, competencies
                - Regular 1:1 meetings with managers
                - Recognition and rewards program
                
                Compensation Structure:
                - Basic Salary: 40-50% of CTC
                - HRA: 40-50% of basic salary
                - Annual bonus: Minimum 8.33% of basic salary
                - Performance-based increments
                """,
                "metadata": {
                    "title": "Employee Handbook & HR Data",
                    "department": "HR",
                    "type": "HR Policy",
                    "content_type": "employee_data"
                }
            },
            
            "technical_docs": {
                "content": """
                FinSolve Technologies Engineering Architecture:
                
                System Architecture:
                - Microservices-based, cloud-native system
                - Designed for scalability, resilience, and security
                - Modular design supporting rapid feature development
                
                Technology Stack:
                Frontend:
                - React 18, Redux Toolkit, Tailwind CSS
                - TypeScript, React Query, D3.js
                - Mobile: Swift 5.5 (iOS), Kotlin 1.6 (Android)
                
                Backend:
                - Node.js 18 LTS, Python 3.11 (FastAPI), Go 1.19
                - Express.js, Pydantic, Gin
                - APIs: REST, GraphQL, gRPC
                
                Database:
                - PostgreSQL 15 (primary relational database)
                - MongoDB 6.0 (user profiles, metadata)
                - Redis 7.0 (caching, session management)
                - Amazon S3 (documents, backups)
                
                Infrastructure:
                - AWS (primary cloud provider)
                - Kubernetes 1.25+ (container orchestration)
                - Terraform (Infrastructure as Code)
                - Docker containers with security scanning
                
                Development Process:
                - Agile methodology with 2-week sprints
                - Git workflow with feature branches
                - CI/CD pipeline using Jenkins/GitHub Actions
                - Code review requirements: 2 approvals minimum
                
                Security:
                - OAuth 2.0, JWT tokens
                - TLS 1.3 for all communications
                - AES-256 encryption for data at rest
                - Regular security audits and penetration testing
                
                Performance Targets:
                - API response time: P95 < 200ms
                - Uptime: 99.99%
                - Page load time: < 2 seconds
                """,
                "metadata": {
                    "title": "Engineering Master Document",
                    "department": "Engineering",
                    "type": "Technical Documentation",
                    "content_type": "technical_docs"
                }
            },
            
            "general_policies": {
                "content": """
                FinSolve Technologies General Company Information:
                
                Company Mission:
                "To empower financial freedom through secure, scalable, and innovative technology solutions."
                
                Core Values:
                - Integrity: Act with honesty and transparency
                - Respect: Value diversity and treat everyone with dignity
                - Innovation: Encourage creativity and continuous improvement
                - Customer Focus: Customers at the heart of everything we do
                - Accountability: Take responsibility for actions and results
                
                General Policies:
                - Code of Conduct: Professional behavior and respect for all
                - Anti-Discrimination: Equal opportunity regardless of background
                - Work from Home: Up to 2 days/week for eligible roles
                - Dress Code: Business casual Monday-Thursday, smart casual Friday
                
                Employee Services:
                - Employee Assistance Program (EAP)
                - Mental health support and counseling
                - Wellness programs and health check-ups
                - Team outings and social activities
                
                Communication:
                - Internal communications through official channels
                - Quarterly all-hands meetings
                - Regular newsletter updates
                - Open door policy for feedback
                
                Training & Development:
                - Mandatory induction training for new hires
                - Technical and soft skills workshops
                - Certification reimbursement up to ₹50,000/year
                - Internal job postings for career growth
                """,
                "metadata": {
                    "title": "General Company Policies",
                    "department": "General",
                    "type": "Policy Document",
                    "content_type": "general_policies"
                }
            }
        }
    
    def get_documents_for_role(self, role: str) -> List[Document]:
        """Get documents accessible to a specific role"""
        from auth_system import AuthSystem
        auth_system = AuthSystem()
        
        accessible_docs = auth_system.get_accessible_documents(role)
        documents = []
        
        for content_type in accessible_docs:
            if content_type in self.embedded_content:
                content_data = self.embedded_content[content_type]
                
                # Create document with metadata
                doc = Document(
                    page_content=content_data["content"],
                    metadata={
                        **content_data["metadata"],
                        "accessible_to": role
                    }
                )
                
                # Split into chunks
                chunks = self.text_splitter.split_documents([doc])
                
                # Add chunk information to metadata
                for i, chunk in enumerate(chunks):
                    chunk.metadata['chunk_id'] = i
                    chunk.metadata['total_chunks'] = len(chunks)
                
                documents.extend(chunks)
        
        return documents
    
    def get_all_documents(self) -> List[Document]:
        """Get all available documents"""
        all_documents = []
        
        for content_type, content_data in self.embedded_content.items():
            doc = Document(
                page_content=content_data["content"],
                metadata=content_data["metadata"]
            )
            
            # Split into chunks
            chunks = self.text_splitter.split_documents([doc])
            
            # Add chunk information to metadata
            for i, chunk in enumerate(chunks):
                chunk.metadata['chunk_id'] = i
                chunk.metadata['total_chunks'] = len(chunks)
            
            all_documents.extend(chunks)
        
        return all_documents
    
    def get_document_info(self) -> Dict:
        """Get information about all available documents"""
        doc_info = {}
        for content_type, content_data in self.embedded_content.items():
            doc_info[content_type] = content_data["metadata"]
        return doc_info
    
    def search_content(self, query: str, role: str) -> List[Document]:
        """Search content based on query and role"""
        documents = self.get_documents_for_role(role)
        
        # Simple keyword matching for demonstration
        query_lower = query.lower()
        relevant_docs = []
        
        for doc in documents:
            content_lower = doc.page_content.lower()
            # Check if query terms are in the content
            if any(term in content_lower for term in query_lower.split()):
                relevant_docs.append(doc)
        
        return relevant_docs[:5]  # Return top 5 matches