Spaces:
Sleeping
Sleeping
File size: 6,025 Bytes
1cf66c7 62a9905 1cf66c7 e96e18c 1cf66c7 62a9905 1cf66c7 90a5b5c 1cf66c7 bc88d6e 1cf66c7 62a9905 1cf66c7 62a9905 1cf66c7 62a9905 ae3c884 62a9905 ae3c884 62a9905 ae3c884 62a9905 69e68db 62a9905 1cf66c7 62a9905 1cf66c7 62a9905 7b67661 62a9905 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import os
import boto3
import json
from dotenv import load_dotenv
from video_parser import VideoParser
from excel_parser import ExcelParser
import re
load_dotenv()
class NovaProAgent:
def __init__(self):
print("NovaProAgent initialized.")
# Get AWS credentials from environment variables
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
# Initialize the AWS client
boto3.client(
's3',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key
)
session = boto3.session.Session()
self.bedrock_client = boto3.client(
service_name='bedrock-runtime',
region_name='us-east-1'
)
self.model_id = "amazon.nova-pro-v1:0"
self.content_type = "application/json"
self.accept = "application/json"
# Initialize parsers
self.video_parser = VideoParser()
self.excel_parser = ExcelParser()
async def __call__(self, question: str) -> str:
print(f"NovaProAgent received question (first 50 chars): {question}...")
try:
# Check if question involves video analysis
if 'youtube.com' in question or 'video' in question.lower():
return await self._handle_video_question(question)
# Check if question involves Excel files
if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
return await self._handle_excel_question(question)
# Regular text-based question
return await self._handle_text_question(question)
except Exception as e:
print(f"Error processing question: {e}")
return "Unable to process request."
async def _handle_video_question(self, question: str) -> str:
"""Handle questions that require video analysis"""
# Extract YouTube URL
youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
if not youtube_url:
return "No valid YouTube URL found in question."
url = youtube_url.group()
try:
# Download video using VideoParser
video_path = self.video_parser.download_youtube_video(url)
# Extract frames for analysis
frames = self.video_parser.analyze_video_frames(video_path, sample_rate=60)
# Clean up
self.video_parser.cleanup()
return f"Analyzed {len(frames)} frames from video. Video processing complete."
except Exception as e:
return f"Video analysis failed: {str(e)}"
async def _handle_excel_question(self, question: str) -> str:
"""Handle questions that require Excel file analysis"""
# Extract file path from question if present
file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
file_path = None
for pattern in file_patterns:
match = re.search(pattern, question)
if match:
file_path = match.group(1)
break
if not file_path:
return "Please provide Excel file path in your question."
try:
if 'sales' in question.lower() and 'food' in question.lower():
results = self.excel_parser.analyze_sales_data(file_path)
return results.get('total_food_sales', 'No sales data found')
else:
df = self.excel_parser.read_excel_file(file_path)
return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
except Exception as e:
return f"Excel analysis failed: {str(e)}"
async def _handle_text_question(self, question: str) -> str:
"""Handle regular text-based questions"""
# Create a more focused prompt for concise answers
prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.
Question: {question}
Answer:"""
# Prepare the request payload for Nova Pro
payload = {
"messages": [
{
"role": "user",
"content": [{
"text": prompt
}]
}
],
"inferenceConfig": {
"max_new_tokens": 250,
"temperature": 0.0
}
}
# Call Nova Pro model
response = self.bedrock_client.invoke_model(
modelId=self.model_id,
contentType=self.content_type,
accept=self.accept,
body=json.dumps(payload)
)
# Parse response
response_body = json.loads(response['body'].read())
answer = response_body['output']['message']['content'][0]['text']
# Clean up the answer
answer = answer.strip()
# Remove verbose beginnings
verbose_starts = [
"To answer this question",
"Based on the information",
"According to",
"The answer is",
"Looking at"
]
for start in verbose_starts:
if answer.lower().startswith(start.lower()):
sentences = answer.split('. ')
for sentence in sentences[1:]:
if len(sentence.strip()) > 10:
answer = sentence.strip()
break
# Limit length
if len(answer) > 200:
sentences = answer.split('. ')
answer = sentences[0] + '.'
return answer |