Spaces:
Sleeping
Sleeping
File size: 6,719 Bytes
29e530e 1d4ab9d 29e530e 1d4ab9d 29e530e 1d4ab9d 29e530e 1d4ab9d 29e530e 1d4ab9d 29e530e 1d4ab9d 29e530e 1d4ab9d cd135cb 1d4ab9d cd135cb 1d4ab9d cd135cb 1d4ab9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import os
import boto3
import json
from dotenv import load_dotenv
from video_parser import VideoParser
from excel_parser import ExcelParser
import re
load_dotenv()
class NovaProAgent:
def __init__(self):
print("NovaProAgent initialized.")
# Get AWS credentials from environment variables
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
# Initialize the AWS client
boto3.client(
's3',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key
)
session = boto3.session.Session()
self.bedrock_client = boto3.client(
service_name='bedrock-runtime',
region_name='us-east-1'
)
self.model_id = "amazon.nova-pro-v1:0"
self.content_type = "application/json"
self.accept = "application/json"
# Initialize parsers
self.video_parser = VideoParser()
self.excel_parser = ExcelParser()
async def __call__(self, question: str) -> str:
print(f"NovaProAgent received question (first 50 chars): {question}...")
try:
# Check if question involves video analysis
if 'youtube.com' in question or 'video' in question.lower():
return await self._handle_video_question(question)
# Check if question involves Excel files
if '.xlsx' in question or '.xls' in question or 'excel' in question.lower():
return await self._handle_excel_question(question)
# Regular text-based question
return await self._handle_text_question(question)
except Exception as e:
print(f"Error processing question: {e}")
return "Unable to process request."
async def _handle_video_question(self, question: str) -> str:
"""Handle questions that require video analysis"""
# Extract YouTube URL
youtube_url = re.search(r'https://www\.youtube\.com/watch\?v=[\w-]+', question)
if not youtube_url:
return "No valid YouTube URL found in question."
url = youtube_url.group()
# Extract video ID for reference
video_id = re.search(r'v=([\w-]+)', url).group(1)
# Use Nova Pro to provide intelligent response about video analysis
video_prompt = f"""User is asking about a YouTube video: {url}
Video ID: {video_id}
User question: {question}
Provide a helpful response about video analysis limitations and suggest alternatives."""
payload = {
"messages": [{
"role": "user",
"content": [{"text": video_prompt}]
}],
"inferenceConfig": {
"max_new_tokens": 150,
"temperature": 0.0
}
}
try:
response = self.bedrock_client.invoke_model(
modelId=self.model_id,
contentType=self.content_type,
accept=self.accept,
body=json.dumps(payload)
)
response_body = json.loads(response['body'].read())
return response_body['output']['message']['content'][0]['text'].strip()
except Exception as e:
return f"Video ID: {video_id}. Direct video analysis unavailable due to access restrictions."
async def _handle_excel_question(self, question: str) -> str:
"""Handle questions that require Excel file analysis"""
# Extract file path from question if present
file_patterns = [r'([A-Za-z]:\\[^\s]+\.xlsx?)', r'([^\s]+\.xlsx?)']
file_path = None
for pattern in file_patterns:
match = re.search(pattern, question)
if match:
file_path = match.group(1)
break
if not file_path:
return "Please provide Excel file path in your question."
try:
if 'sales' in question.lower() and 'food' in question.lower():
results = self.excel_parser.analyze_sales_data(file_path)
return results.get('total_food_sales', 'No sales data found')
else:
df = self.excel_parser.read_excel_file(file_path)
return f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns."
except Exception as e:
return f"Excel analysis failed: {str(e)}"
async def _handle_text_question(self, question: str) -> str:
"""Handle regular text-based questions"""
# Create a more focused prompt for concise answers
prompt = f"""Answer this question directly and concisely. Provide only the essential information requested, not explanations or step-by-step reasoning unless specifically asked.
Question: {question}
Answer:"""
# Prepare the request payload for Nova Pro
payload = {
"messages": [
{
"role": "user",
"content": [{
"text": prompt
}]
}
],
"inferenceConfig": {
"max_new_tokens": 250,
"temperature": 0.0
}
}
# Call Nova Pro model
response = self.bedrock_client.invoke_model(
modelId=self.model_id,
contentType=self.content_type,
accept=self.accept,
body=json.dumps(payload)
)
# Parse response
response_body = json.loads(response['body'].read())
answer = response_body['output']['message']['content'][0]['text']
# Clean up the answer
answer = answer.strip()
# Remove verbose beginnings
verbose_starts = [
"To answer this question",
"Based on the information",
"According to",
"The answer is",
"Looking at"
]
for start in verbose_starts:
if answer.lower().startswith(start.lower()):
sentences = answer.split('. ')
for sentence in sentences[1:]:
if len(sentence.strip()) > 10:
answer = sentence.strip()
break
# Limit length
if len(answer) > 200:
sentences = answer.split('. ')
answer = sentences[0] + '.'
return answer |