Maga222006 commited on
Commit ·
5cf48c0
1
Parent(s): c3adf17
MultiagentPersonalAssistant
Browse files- .idea/MultiagentPersonalAssistant.iml +1 -1
- .idea/misc.xml +1 -1
- agent/__pycache__/file_preprocessing.cpython-312.pyc +0 -0
- agent/__pycache__/models.cpython-312.pyc +0 -0
- agent/file_preprocessing.py +18 -48
- agent/models.py +4 -0
- app.py +15 -6
.idea/MultiagentPersonalAssistant.iml
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
<module type="PYTHON_MODULE" version="4">
|
| 3 |
<component name="NewModuleRootManager">
|
| 4 |
<content url="file://$MODULE_DIR$" />
|
| 5 |
-
<orderEntry type="jdk" jdkName="
|
| 6 |
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
</component>
|
| 8 |
<component name="PyDocumentationSettings">
|
|
|
|
| 2 |
<module type="PYTHON_MODULE" version="4">
|
| 3 |
<component name="NewModuleRootManager">
|
| 4 |
<content url="file://$MODULE_DIR$" />
|
| 5 |
+
<orderEntry type="jdk" jdkName="test" jdkType="Python SDK" />
|
| 6 |
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
</component>
|
| 8 |
<component name="PyDocumentationSettings">
|
.idea/misc.xml
CHANGED
|
@@ -3,5 +3,5 @@
|
|
| 3 |
<component name="Black">
|
| 4 |
<option name="sdkName" value="MultiagentPersonalAssistant" />
|
| 5 |
</component>
|
| 6 |
-
<component name="ProjectRootManager" version="2" project-jdk-name="
|
| 7 |
</project>
|
|
|
|
| 3 |
<component name="Black">
|
| 4 |
<option name="sdkName" value="MultiagentPersonalAssistant" />
|
| 5 |
</component>
|
| 6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="test" project-jdk-type="Python SDK" />
|
| 7 |
</project>
|
agent/__pycache__/file_preprocessing.cpython-312.pyc
CHANGED
|
Binary files a/agent/__pycache__/file_preprocessing.cpython-312.pyc and b/agent/__pycache__/file_preprocessing.cpython-312.pyc differ
|
|
|
agent/__pycache__/models.cpython-312.pyc
CHANGED
|
Binary files a/agent/__pycache__/models.cpython-312.pyc and b/agent/__pycache__/models.cpython-312.pyc differ
|
|
|
agent/file_preprocessing.py
CHANGED
|
@@ -1,36 +1,25 @@
|
|
| 1 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 2 |
from speechbrain.inference.classifiers import EncoderClassifier
|
|
|
|
|
|
|
| 3 |
import speech_recognition as sr
|
| 4 |
from pydub import AudioSegment
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from PyPDF2 import PdfReader
|
| 7 |
from docx import Document
|
| 8 |
-
from PIL import Image
|
| 9 |
import torchaudio
|
| 10 |
import mimetypes
|
| 11 |
import asyncio
|
| 12 |
-
import
|
| 13 |
-
import io
|
| 14 |
import os
|
| 15 |
|
| 16 |
load_dotenv()
|
| 17 |
-
MID = "apple/FastVLM-1.5B"
|
| 18 |
-
IMAGE_TOKEN_INDEX = -200
|
| 19 |
-
|
| 20 |
-
tok = AutoTokenizer.from_pretrained(MID, trust_remote_code=True)
|
| 21 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 22 |
-
MID,
|
| 23 |
-
dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 24 |
-
device_map="auto",
|
| 25 |
-
trust_remote_code=True,
|
| 26 |
-
)
|
| 27 |
language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp")
|
| 28 |
|
| 29 |
|
| 30 |
async def preprocess_file(file_name: str):
|
| 31 |
mime_type = mimetypes.guess_type(file_name)[0]
|
| 32 |
if "image" in mime_type:
|
| 33 |
-
return await
|
| 34 |
elif "video" in mime_type:
|
| 35 |
prompt = "Give a detailed description of the video."
|
| 36 |
elif "audio" in mime_type:
|
|
@@ -79,40 +68,21 @@ def preprocess_audio(file_name: str):
|
|
| 79 |
return text
|
| 80 |
|
| 81 |
|
| 82 |
-
def preprocess_image(file_name: str)
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
)
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
# Tokenize text around the image placeholder
|
| 93 |
-
pre_ids = tok(pre, return_tensors="pt", add_special_tokens=False).input_ids
|
| 94 |
-
post_ids = tok(post, return_tensors="pt", add_special_tokens=False).input_ids
|
| 95 |
-
|
| 96 |
-
# Insert the image token id (-200)
|
| 97 |
-
img_tok = torch.tensor([[IMAGE_TOKEN_INDEX]], dtype=pre_ids.dtype)
|
| 98 |
-
input_ids = torch.cat([pre_ids, img_tok, post_ids], dim=1).to(model.device)
|
| 99 |
-
attention_mask = torch.ones_like(input_ids, device=model.device)
|
| 100 |
-
|
| 101 |
-
# Preprocess the image
|
| 102 |
-
img = Image.open(file_name).convert("RGB")
|
| 103 |
-
px = model.get_vision_tower().image_processor(images=img, return_tensors="pt")["pixel_values"]
|
| 104 |
-
px = px.to(model.device, dtype=model.dtype)
|
| 105 |
-
|
| 106 |
-
# Generate response
|
| 107 |
-
with torch.no_grad():
|
| 108 |
-
out = model.generate(
|
| 109 |
-
inputs=input_ids,
|
| 110 |
-
attention_mask=attention_mask,
|
| 111 |
-
images=px,
|
| 112 |
-
max_new_tokens=128,
|
| 113 |
-
)
|
| 114 |
-
|
| 115 |
-
return tok.decode(out[0], skip_special_tokens=True)
|
| 116 |
|
| 117 |
|
| 118 |
def preprocess_text(file_name, mime_type: str) -> str:
|
|
|
|
|
|
|
| 1 |
from speechbrain.inference.classifiers import EncoderClassifier
|
| 2 |
+
from langchain_core.messages import HumanMessage
|
| 3 |
+
from agent.models import llm_image
|
| 4 |
import speech_recognition as sr
|
| 5 |
from pydub import AudioSegment
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from PyPDF2 import PdfReader
|
| 8 |
from docx import Document
|
|
|
|
| 9 |
import torchaudio
|
| 10 |
import mimetypes
|
| 11 |
import asyncio
|
| 12 |
+
import base64
|
|
|
|
| 13 |
import os
|
| 14 |
|
| 15 |
load_dotenv()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp")
|
| 17 |
|
| 18 |
|
| 19 |
async def preprocess_file(file_name: str):
|
| 20 |
mime_type = mimetypes.guess_type(file_name)[0]
|
| 21 |
if "image" in mime_type:
|
| 22 |
+
return await preprocess_image(file_name)
|
| 23 |
elif "video" in mime_type:
|
| 24 |
prompt = "Give a detailed description of the video."
|
| 25 |
elif "audio" in mime_type:
|
|
|
|
| 68 |
return text
|
| 69 |
|
| 70 |
|
| 71 |
+
async def preprocess_image(file_name: str):
|
| 72 |
+
with open(file_name, "rb") as f:
|
| 73 |
+
img_b64 = base64.b64encode(f.read()).decode("utf-8")
|
| 74 |
+
response = await llm_image.ainvoke([HumanMessage(
|
| 75 |
+
content=[
|
| 76 |
+
{"type": "text", "text": "Please analyze this image and give detailed description."},
|
| 77 |
+
{
|
| 78 |
+
"type": "image_url",
|
| 79 |
+
"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}
|
| 80 |
+
},
|
| 81 |
+
]
|
| 82 |
+
)
|
| 83 |
+
]
|
| 84 |
)
|
| 85 |
+
return response.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def preprocess_text(file_name, mime_type: str) -> str:
|
agent/models.py
CHANGED
|
@@ -13,4 +13,8 @@ llm_peripheral = init_chat_model(
|
|
| 13 |
|
| 14 |
llm_agents = init_chat_model(
|
| 15 |
model="groq:qwen/qwen3-32b"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
)
|
|
|
|
| 13 |
|
| 14 |
llm_agents = init_chat_model(
|
| 15 |
model="groq:qwen/qwen3-32b"
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
llm_image = init_chat_model(
|
| 19 |
+
model="groq:meta-llama/llama-4-scout-17b-16e-instruct"
|
| 20 |
)
|
app.py
CHANGED
|
@@ -29,14 +29,17 @@ async def file_mode(
|
|
| 29 |
data = await file.read()
|
| 30 |
dest.write_bytes(data)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# Preprocess file
|
| 33 |
transcription = await preprocess_file(str(dest))
|
| 34 |
state_data["message"] = HumanMessage(
|
| 35 |
content=transcription
|
| 36 |
)
|
| 37 |
# Call the agents
|
| 38 |
-
assistant =
|
| 39 |
-
await assistant.authorization()
|
| 40 |
response = await assistant.run()
|
| 41 |
os.remove(str(dest))
|
| 42 |
return response
|
|
@@ -57,6 +60,10 @@ async def file_mode(
|
|
| 57 |
data = await file.read()
|
| 58 |
dest.write_bytes(data)
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
# Preprocess file
|
| 61 |
file_contents = await preprocess_file(str(dest))
|
| 62 |
state_data["message"] = HumanMessage(
|
|
@@ -64,8 +71,7 @@ async def file_mode(
|
|
| 64 |
)
|
| 65 |
|
| 66 |
# Call the agents
|
| 67 |
-
assistant =
|
| 68 |
-
await assistant.authorization()
|
| 69 |
response = await assistant.run()
|
| 70 |
os.remove(str(dest))
|
| 71 |
return response
|
|
@@ -86,6 +92,10 @@ async def file_mode(
|
|
| 86 |
data = await file.read()
|
| 87 |
dest.write_bytes(data)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
# Preprocess file
|
| 90 |
file_contents = await preprocess_file(str(dest))
|
| 91 |
state_data["message"] = HumanMessage(
|
|
@@ -93,8 +103,7 @@ async def file_mode(
|
|
| 93 |
)
|
| 94 |
|
| 95 |
# Call the agents
|
| 96 |
-
assistant =
|
| 97 |
-
await assistant.authorization()
|
| 98 |
response = await assistant.run()
|
| 99 |
os.remove(str(dest))
|
| 100 |
return response
|
|
|
|
| 29 |
data = await file.read()
|
| 30 |
dest.write_bytes(data)
|
| 31 |
|
| 32 |
+
# Initialize agent
|
| 33 |
+
assistant = Assistant(state=state_data)
|
| 34 |
+
await assistant.authorization()
|
| 35 |
+
|
| 36 |
# Preprocess file
|
| 37 |
transcription = await preprocess_file(str(dest))
|
| 38 |
state_data["message"] = HumanMessage(
|
| 39 |
content=transcription
|
| 40 |
)
|
| 41 |
# Call the agents
|
| 42 |
+
assistant.state = state_data
|
|
|
|
| 43 |
response = await assistant.run()
|
| 44 |
os.remove(str(dest))
|
| 45 |
return response
|
|
|
|
| 60 |
data = await file.read()
|
| 61 |
dest.write_bytes(data)
|
| 62 |
|
| 63 |
+
# Initialize agent
|
| 64 |
+
assistant = Assistant(state=state_data)
|
| 65 |
+
await assistant.authorization()
|
| 66 |
+
|
| 67 |
# Preprocess file
|
| 68 |
file_contents = await preprocess_file(str(dest))
|
| 69 |
state_data["message"] = HumanMessage(
|
|
|
|
| 71 |
)
|
| 72 |
|
| 73 |
# Call the agents
|
| 74 |
+
assistant.state = state_data
|
|
|
|
| 75 |
response = await assistant.run()
|
| 76 |
os.remove(str(dest))
|
| 77 |
return response
|
|
|
|
| 92 |
data = await file.read()
|
| 93 |
dest.write_bytes(data)
|
| 94 |
|
| 95 |
+
#Initialize agent
|
| 96 |
+
assistant = Assistant(state=state_data)
|
| 97 |
+
await assistant.authorization()
|
| 98 |
+
|
| 99 |
# Preprocess file
|
| 100 |
file_contents = await preprocess_file(str(dest))
|
| 101 |
state_data["message"] = HumanMessage(
|
|
|
|
| 103 |
)
|
| 104 |
|
| 105 |
# Call the agents
|
| 106 |
+
assistant.state = state_data
|
|
|
|
| 107 |
response = await assistant.run()
|
| 108 |
os.remove(str(dest))
|
| 109 |
return response
|