varun324242 commited on
Commit
eceb45a
·
verified ·
1 Parent(s): 2ba89e1

Upload 20 files

Browse files
ValidationAgent/.DS_Store ADDED
Binary file (6.15 kB). View file
 
ValidationAgent/ValidationAgent.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agency_swarm.agents import Agent
2
+ from agency_swarm.tools import CodeInterpreter
3
+ import os
4
+ import logging
5
+ from .tools.SearchAndScrape import SearchAndScrape
6
+
7
+ class ValidationAgent(Agent):
8
+ def __init__(self):
9
+ super().__init__(
10
+ name="ValidationAgent",
11
+ description="This agent validates market research reports using AI and ensures data completeness.",
12
+ instructions="./instructions.md",
13
+ files_folder="./files",
14
+ schemas_folder="./schemas",
15
+ tools=[SearchAndScrape],
16
+ tools_folder="./tools",
17
+ temperature=0.3,
18
+ model="groq/llama-3.3-70b-versatile",
19
+ max_prompt_tokens=25000,
20
+ )
21
+
22
+ def validate_data(self, report_data):
23
+ """Validate the report using AI and fill gaps if needed."""
24
+ validation_prompt = f"""
25
+ Analyze this market research report for quality and completeness:
26
+
27
+ {report_data}
28
+
29
+ Please check for:
30
+ 1. Missing key information
31
+ 2. Data accuracy and consistency
32
+ 3. Logical flow and structure
33
+ 4. Completeness of sections:
34
+ - Market Size & Growth
35
+ - Competitive Landscape
36
+ - Consumer Analysis
37
+ - Technology & Innovation
38
+ - Future Outlook
39
+
40
+ Provide a detailed assessment with:
41
+ 1. Quality score (0-100)
42
+ 2. List of missing or incomplete sections
43
+ 3. Specific recommendations for improvement
44
+ 4. Additional data points needed
45
+
46
+ Format: JSON with these keys:
47
+ {
48
+ "quality_score": int,
49
+ "missing_sections": list,
50
+ "recommendations": list,
51
+ "additional_data_needed": list,
52
+ "is_complete": boolean
53
+ }
54
+ """
55
+
56
+ try:
57
+ model = self._get_model() # Updated method to get model
58
+ response = model.generate_content(validation_prompt)
59
+ validation_result = response.text
60
+
61
+ # If validation shows missing data, scrape for it
62
+ if '"is_complete": false' in validation_result.lower():
63
+ missing_data = self._fill_missing_data(validation_result)
64
+ if missing_data:
65
+ # Combine original report with new data
66
+ updated_report = self._merge_reports(report_data, missing_data)
67
+ # Validate again
68
+ return self.validate_data(updated_report)
69
+
70
+ return validation_result
71
+
72
+ except Exception as e:
73
+ logging.error(f"Validation error: {str(e)}")
74
+ return {"error": str(e)}
75
+
76
+ def _fill_missing_data(self, validation_result):
77
+ """Fill missing data based on validation results."""
78
+ try:
79
+ # Extract missing sections from validation result
80
+ import json
81
+ result = json.loads(validation_result)
82
+ missing_sections = result.get("missing_sections", [])
83
+
84
+ additional_data = []
85
+ for section in missing_sections:
86
+ # Create specific search query for missing section
87
+ search_query = f"{section} market research data analysis"
88
+ tool = SearchAndScrape(query=search_query)
89
+ section_data = tool.run()
90
+ if section_data:
91
+ additional_data.append({
92
+ "section": section,
93
+ "content": section_data
94
+ })
95
+
96
+ return additional_data if additional_data else None
97
+
98
+ except Exception as e:
99
+ logging.error(f"Error filling missing data: {str(e)}")
100
+ return None
101
+
102
+ def _merge_reports(self, original_report, new_data):
103
+ """Merge original report with newly scraped data."""
104
+ merge_prompt = f"""
105
+ Merge this original report with new data:
106
+
107
+ Original Report:
108
+ {original_report}
109
+
110
+ New Data to Add:
111
+ {new_data}
112
+
113
+ Please create a cohesive, well-structured report that incorporates all information without duplication.
114
+ Ensure proper flow and transitions between sections.
115
+ """
116
+
117
+ try:
118
+ model = self._get_model() # Updated method to get model
119
+ response = model.generate_content(merge_prompt)
120
+ return response.text
121
+ except Exception as e:
122
+ logging.error(f"Error merging reports: {str(e)}")
123
+ return original_report
124
+
125
+ def response_validator(self, message):
126
+ return message
ValidationAgent/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .ValidationAgent import ValidationAgent
ValidationAgent/__pycache__/ValidationAgent.cpython-311.pyc ADDED
Binary file (5.86 kB). View file
 
ValidationAgent/__pycache__/ValidationAgent.cpython-313.pyc ADDED
Binary file (1.42 kB). View file
 
ValidationAgent/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (272 Bytes). View file
 
ValidationAgent/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (249 Bytes). View file
 
ValidationAgent/instructions.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ValidationAgent Instructions
2
+
3
+ You are an agent responsible for reviewing draft reports for completeness, accuracy, and quality. Your role is to identify gaps and inconsistencies in the data and use Firecrawl to scrape additional data to fill these gaps or improve existing sections.
4
+
5
+ ### Primary Instructions:
6
+ 1. Review the draft report provided by the WebScraperAgent for completeness and accuracy.
7
+ 2. Identify any gaps or inconsistencies in the data presented in the draft report.
8
+ 3. Use Firecrawl to scrape additional data to fill identified gaps or improve existing sections of the report.
9
+ 4. Validate the newly gathered data for relevance and accuracy.
10
+ 5. Compile the validated data into a final report format.
11
+ 6. Communicate with the WebScraperAgent to address any persistent issues or discrepancies in the data.
12
+ 7. Ensure the final report is accurate, complete, and ready for submission to the MarketInsightsCEO.
ValidationAgent/tools/DataValidationTool.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agency_swarm.tools import BaseTool
2
+ from pydantic import Field
3
+ import re
4
+ from collections import defaultdict
5
+
6
+ class DataValidationTool(BaseTool):
7
+ """
8
+ This tool ensures that all data is accurate and compiles it into a cohesive final report.
9
+ It validates numerical consistency and checks for valid date formats.
10
+ """
11
+
12
+ raw_data: str = Field(
13
+ ..., description="The raw data to be validated and compiled into the final report."
14
+ )
15
+
16
+ expected_repetitions: dict = Field(
17
+ default_factory=dict,
18
+ description="A dictionary specifying numbers that are expected to repeat and their expected counts."
19
+ )
20
+
21
+ def run(self):
22
+ """
23
+ Validates the accuracy of the data and compiles it into a cohesive final report.
24
+ """
25
+
26
+ # Validate numerical consistency
27
+ numbers = re.findall(r'\b\d+\b', self.raw_data)
28
+ number_counts = defaultdict(int)
29
+
30
+ for number in numbers:
31
+ number_counts[number] += 1
32
+
33
+ conflicting_numbers = []
34
+ for number, count in number_counts.items():
35
+ expected_count = self.expected_repetitions.get(number, 0)
36
+ if expected_count == 0:
37
+ # If no expected count is provided, assume the number should appear once
38
+ expected_count = 1
39
+ if count != expected_count:
40
+ conflicting_numbers.append(f"{number} (found {count}, expected {expected_count})")
41
+
42
+ if conflicting_numbers:
43
+ return f"Data validation failed: Conflicting numerical data found for numbers: {', '.join(conflicting_numbers)}."
44
+
45
+ # Check for valid date formats (e.g., YYYY-MM-DD)
46
+ dates = re.findall(r'\b\d{4}-\d{2}-\d{2}\b', self.raw_data)
47
+ if not dates:
48
+ return "Data validation failed: No valid date formats found."
49
+
50
+ # Compile the validated data into a final report
51
+ final_report = f"Final Report:\n\n{self.raw_data}"
52
+
53
+ # Return the final report
54
+ return final_report
ValidationAgent/tools/FirecrawlDataScraperTool.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agency_swarm.tools import BaseTool
2
+ from pydantic import Field
3
+ import requests
4
+ import os
5
+
6
+ # Assuming Firecrawl API requires an API key
7
+ firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
8
+ firecrawl_base_url = "https://api.firecrawl.com/data"
9
+
10
+ class FirecrawlDataScraperTool(BaseTool):
11
+ """
12
+ This tool interfaces with Firecrawl to gather additional data based on identified gaps or areas needing improvement in the report.
13
+ """
14
+
15
+ gap_description: str = Field(
16
+ ..., description="Description of the identified gap or area needing improvement in the report."
17
+ )
18
+
19
+ def run(self):
20
+ """
21
+ Interfaces with Firecrawl to gather additional data based on the provided gap description.
22
+ """
23
+ # Prepare the request to Firecrawl API
24
+ headers = {
25
+ "Authorization": f"Bearer {firecrawl_api_key}",
26
+ "Content-Type": "application/json"
27
+ }
28
+ payload = {
29
+ "query": self.gap_description
30
+ }
31
+
32
+ # Send the request to Firecrawl API
33
+ response = requests.post(firecrawl_base_url, json=payload, headers=headers)
34
+
35
+ # Check if the request was successful
36
+ if response.status_code == 200:
37
+ data = response.json()
38
+ return f"Data gathered from Firecrawl: {data}"
39
+ else:
40
+ return f"Failed to gather data from Firecrawl. Status code: {response.status_code}, Error: {response.text}"
ValidationAgent/tools/GapIdentificationTool.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agency_swarm.tools import BaseTool
2
+ from pydantic import Field
3
+ import re
4
+
5
+ class GapIdentificationTool(BaseTool):
6
+ """
7
+ This tool analyzes the report to find any logical gaps or inconsistencies in the data or narrative.
8
+ """
9
+
10
+ report_content: str = Field(
11
+ ..., description="The content of the report to be analyzed for logical gaps or inconsistencies."
12
+ )
13
+
14
+ def run(self):
15
+ """
16
+ Analyzes the report content to identify logical gaps or inconsistencies in the data or narrative.
17
+ """
18
+ # Define patterns or keywords that might indicate logical gaps or inconsistencies
19
+ gap_indicators = [
20
+ "however", "but", "although", "nevertheless", "in contrast", "on the other hand"
21
+ ]
22
+
23
+ # Find sentences with potential logical gaps
24
+ potential_gaps = []
25
+ sentences = re.split(r'(?<=[.!?]) +', self.report_content)
26
+ for sentence in sentences:
27
+ if any(indicator in sentence for indicator in gap_indicators):
28
+ potential_gaps.append(sentence)
29
+
30
+ # Check for inconsistencies in data (e.g., conflicting numbers)
31
+ # This is a simple example using regex to find numbers
32
+ numbers = re.findall(r'\b\d+\b', self.report_content)
33
+ inconsistencies = []
34
+ if len(set(numbers)) != len(numbers):
35
+ inconsistencies.append("Conflicting numerical data found.")
36
+
37
+ # Compile the analysis results
38
+ analysis_results = {
39
+ "potential_gaps": potential_gaps,
40
+ "inconsistencies": inconsistencies
41
+ }
42
+
43
+ # Return the analysis results as a string
44
+ return f"Analysis Results: {analysis_results}"
ValidationAgent/tools/ReportReviewTool.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agency_swarm.tools import BaseTool
2
+ from pydantic import Field
3
+ import re
4
+
5
+ class ReportReviewTool(BaseTool):
6
+ """
7
+ This tool analyzes the content of draft reports, checking for completeness, accuracy, and quality.
8
+ It identifies any missing sections or errors in the report.
9
+ """
10
+
11
+ report_content: str = Field(
12
+ ..., description="The content of the draft report to be analyzed."
13
+ )
14
+
15
+ def run(self):
16
+ """
17
+ Analyzes the report content for completeness, accuracy, and quality.
18
+ Identifies missing sections or errors in the report.
19
+ """
20
+ # Define the expected sections in a report
21
+ expected_sections = [
22
+ "Introduction", "Methodology", "Results", "Discussion", "Conclusion"
23
+ ]
24
+
25
+ # Check for missing sections
26
+ missing_sections = [
27
+ section for section in expected_sections if section not in self.report_content
28
+ ]
29
+
30
+ # Check for common errors (e.g., spelling mistakes)
31
+ # This is a simple example using regex to find repeated words
32
+ errors = re.findall(r'\b(\w+)\s+\1\b', self.report_content)
33
+
34
+ # Check for quality (e.g., length of the report)
35
+ quality_issues = []
36
+ if len(self.report_content.split()) < 500:
37
+ quality_issues.append("The report is too short, consider adding more content.")
38
+
39
+ # Compile the analysis results
40
+ analysis_results = {
41
+ "missing_sections": missing_sections,
42
+ "errors": errors,
43
+ "quality_issues": quality_issues
44
+ }
45
+
46
+ # Return the analysis results as a string
47
+ return f"Analysis Results: {analysis_results}"
ValidationAgent/tools/SearchAndScrape.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agency_swarm.tools import BaseTool
2
+ from pydantic import Field
3
+ import logging
4
+ import os
5
+ import time
6
+
7
+ try:
8
+ from firecrawl import FirecrawlApp
9
+ except ImportError:
10
+ raise ImportError(
11
+ "Required packages not found. Please install them using:\n"
12
+ "pip install firecrawl"
13
+ )
14
+
15
+ # Initialize Firecrawl
16
+ FIRECRAWL_API_KEY = "fc-5fadfeae30314d4ea8a3d9afaa75c493"
17
+ firecrawl_app = FirecrawlApp(api_key=FIRECRAWL_API_KEY)
18
+
19
+ class SearchAndScrape(BaseTool):
20
+ """
21
+ This tool scrapes content using Firecrawl based on a provided query.
22
+ """
23
+
24
+ query: str = Field(
25
+ ...,
26
+ description="The search query to look for",
27
+ examples=["market trends in technology 2024", "AI industry statistics"]
28
+ )
29
+
30
+ def scrape_with_retry(self, url, max_retries=3):
31
+ """Helper function to scrape URL with retry logic"""
32
+ problematic_domains = [
33
+ 'sparktoro.com', 'j-jdis.com', 'linkedin.com',
34
+ 'facebook.com', 'twitter.com', 'reddit.com', '.pdf'
35
+ ]
36
+
37
+ if any(domain in url.lower() for domain in problematic_domains):
38
+ logging.info(f"Skipping problematic URL: {url}")
39
+ return None
40
+
41
+ for attempt in range(max_retries):
42
+ try:
43
+ response = firecrawl_app.scrape_url(
44
+ url=url,
45
+ params={'formats': ['markdown']}
46
+ )
47
+
48
+ if response and response.get('markdown'):
49
+ content = response.get('markdown')
50
+ if len(content.strip()) > 200:
51
+ return content
52
+ return None
53
+ except Exception as e:
54
+ logging.error(f"Attempt {attempt + 1} failed for {url}: {str(e)}")
55
+ if attempt < max_retries - 1:
56
+ time.sleep(2)
57
+ continue
58
+ return None
59
+
60
+ def run(self):
61
+ logging.info(f"Scraping content for query: {self.query}")
62
+ # Here you would typically have a list of URLs to scrape based on the query.
63
+ # For this example, we will assume a predefined list of URLs.
64
+ search_results = ["http://example.com/article1", "http://example.com/article2"] # Placeholder URLs
65
+
66
+ if not search_results:
67
+ return "No search results found."
68
+
69
+ for url in search_results:
70
+ logging.info(f"Attempting to scrape URL: {url}")
71
+ content = self.scrape_with_retry(url)
72
+ if content:
73
+ logging.info(f"Successfully scraped content from {url}")
74
+ return f"Content from {url}:\n\n{content}"
75
+
76
+ return "Failed to scrape content from any of the search results"
ValidationAgent/tools/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .SearchAndScrape import SearchAndScrape
ValidationAgent/tools/__pycache__/DataValidationTool.cpython-311.pyc ADDED
Binary file (2.72 kB). View file
 
ValidationAgent/tools/__pycache__/FirecrawlDataScraperTool.cpython-311.pyc ADDED
Binary file (2.09 kB). View file
 
ValidationAgent/tools/__pycache__/GapIdentificationTool.cpython-311.pyc ADDED
Binary file (2.38 kB). View file
 
ValidationAgent/tools/__pycache__/ReportReviewTool.cpython-311.pyc ADDED
Binary file (2.23 kB). View file
 
ValidationAgent/tools/__pycache__/SearchAndScrape.cpython-311.pyc ADDED
Binary file (4.09 kB). View file
 
ValidationAgent/tools/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (278 Bytes). View file