set up app.py with fully integrated text to ASL video flow
Browse files- .gitignore +2 -0
- .gradio/certificate.pem +31 -0
- __pycache__/asl_gloss.cpython-311.pyc +0 -0
- __pycache__/asl_glosser.cpython-311.pyc +0 -0
- __pycache__/document_parser.cpython-311.pyc +0 -0
- __pycache__/document_parsing.cpython-311.pyc +0 -0
- __pycache__/document_parsing.cpython-313.pyc +0 -0
- __pycache__/document_to_gloss.cpython-311.pyc +0 -0
- __pycache__/document_to_gloss.cpython-313.pyc +0 -0
- __pycache__/vectorizer.cpython-311.pyc +0 -0
- __pycache__/video_gen.cpython-311.pyc +0 -0
- __pycache__/word2vec.cpython-311.pyc +0 -0
- app.py +173 -4
- asl_gloss.py +323 -0
- document_parsing.py +263 -0
- document_to_gloss.py +280 -0
- requirements.txt +16 -0
- test.py +85 -0
- vectorizer.py +155 -0
- video_gen.py +562 -0
- videos_rows.csv +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
| 2 |
+
.env
|
.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
__pycache__/asl_gloss.cpython-311.pyc
ADDED
|
Binary file (14.3 kB). View file
|
|
|
__pycache__/asl_glosser.cpython-311.pyc
ADDED
|
Binary file (12.1 kB). View file
|
|
|
__pycache__/document_parser.cpython-311.pyc
ADDED
|
Binary file (12.4 kB). View file
|
|
|
__pycache__/document_parsing.cpython-311.pyc
ADDED
|
Binary file (12.4 kB). View file
|
|
|
__pycache__/document_parsing.cpython-313.pyc
ADDED
|
Binary file (10.6 kB). View file
|
|
|
__pycache__/document_to_gloss.cpython-311.pyc
ADDED
|
Binary file (12.1 kB). View file
|
|
|
__pycache__/document_to_gloss.cpython-313.pyc
ADDED
|
Binary file (10.6 kB). View file
|
|
|
__pycache__/vectorizer.cpython-311.pyc
ADDED
|
Binary file (7.07 kB). View file
|
|
|
__pycache__/video_gen.cpython-311.pyc
ADDED
|
Binary file (29.8 kB). View file
|
|
|
__pycache__/word2vec.cpython-311.pyc
ADDED
|
Binary file (7.56 kB). View file
|
|
|
app.py
CHANGED
|
@@ -1,7 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from document_to_gloss import DocumentToASLConverter
|
| 2 |
+
from vectorizer import Vectorizer
|
| 3 |
+
from video_gen import create_multi_stitched_video
|
| 4 |
import gradio as gr
|
| 5 |
+
import asyncio
|
| 6 |
+
import re
|
| 7 |
+
import boto3
|
| 8 |
+
import os
|
| 9 |
+
from botocore.config import Config
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
import requests
|
| 12 |
+
import tempfile
|
| 13 |
|
| 14 |
+
# Load environment variables from .env file
|
| 15 |
+
load_dotenv()
|
| 16 |
|
| 17 |
+
# Load R2/S3 environment secrets
|
| 18 |
+
R2_ENDPOINT = os.environ.get("R2_ENDPOINT")
|
| 19 |
+
R2_ACCESS_KEY_ID = os.environ.get("R2_ACCESS_KEY_ID")
|
| 20 |
+
R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")
|
| 21 |
+
|
| 22 |
+
# Validate that required environment variables are set
|
| 23 |
+
if not all([R2_ENDPOINT, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY]):
|
| 24 |
+
raise ValueError("Missing required R2 environment variables. Please check your .env file.")
|
| 25 |
+
|
| 26 |
+
title = "AI-SL"
|
| 27 |
+
description = "Convert text to ASL!"
|
| 28 |
+
article = ("<p style='text-align: center'><a href='https://github.com/deenasun' "
|
| 29 |
+
"target='_blank'>Deena Sun on Github</a></p>")
|
| 30 |
+
inputs = gr.File(label="Upload Document (pdf, txt, docx, or epub)")
|
| 31 |
+
outputs = [
|
| 32 |
+
gr.JSON(label="Processing Results"),
|
| 33 |
+
gr.Video(label="ASL Video Output")
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
asl_converter = DocumentToASLConverter()
|
| 37 |
+
vectorizer = Vectorizer()
|
| 38 |
+
session = boto3.session.Session()
|
| 39 |
+
|
| 40 |
+
s3 = session.client(
|
| 41 |
+
service_name='s3',
|
| 42 |
+
region_name='auto',
|
| 43 |
+
endpoint_url=R2_ENDPOINT,
|
| 44 |
+
aws_access_key_id=R2_ACCESS_KEY_ID,
|
| 45 |
+
aws_secret_access_key=R2_SECRET_ACCESS_KEY,
|
| 46 |
+
config=Config(signature_version='s3v4')
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
def clean_gloss_token(token):
|
| 50 |
+
"""
|
| 51 |
+
Clean a gloss token by removing brackets, newlines, and extra whitespace
|
| 52 |
+
"""
|
| 53 |
+
# Remove brackets and newlines
|
| 54 |
+
cleaned = re.sub(r'[\[\]\n\r]', '', token)
|
| 55 |
+
# Remove extra whitespace
|
| 56 |
+
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
|
| 57 |
+
cleaned = cleaned.lower()
|
| 58 |
+
return cleaned
|
| 59 |
+
|
| 60 |
+
async def parse_vectorize_and_search(file):
|
| 61 |
+
print(file)
|
| 62 |
+
gloss = asl_converter.convert_document(file)
|
| 63 |
+
print("ASL", gloss)
|
| 64 |
+
|
| 65 |
+
# Split by spaces and clean each token
|
| 66 |
+
gloss_tokens = gloss.split()
|
| 67 |
+
cleaned_tokens = []
|
| 68 |
+
|
| 69 |
+
for token in gloss_tokens:
|
| 70 |
+
cleaned = clean_gloss_token(token)
|
| 71 |
+
if cleaned: # Only add non-empty tokens
|
| 72 |
+
cleaned_tokens.append(cleaned)
|
| 73 |
+
|
| 74 |
+
print("Cleaned tokens:", cleaned_tokens)
|
| 75 |
+
|
| 76 |
+
videos = []
|
| 77 |
+
video_files = [] # Store local file paths for stitching
|
| 78 |
+
|
| 79 |
+
for g in cleaned_tokens:
|
| 80 |
+
print(f"Processing {g}")
|
| 81 |
+
try:
|
| 82 |
+
result = await vectorizer.vector_query_from_supabase(query=g)
|
| 83 |
+
print("result", result)
|
| 84 |
+
if result.get("match", False):
|
| 85 |
+
video_url = result["video_url"]
|
| 86 |
+
videos.append(video_url)
|
| 87 |
+
|
| 88 |
+
# Download the video
|
| 89 |
+
local_path = download_video_from_url(video_url)
|
| 90 |
+
if local_path:
|
| 91 |
+
video_files.append(local_path)
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Error processing {g}: {e}")
|
| 95 |
+
continue
|
| 96 |
+
|
| 97 |
+
# Create stitched video if we have multiple videos
|
| 98 |
+
stitched_video_path = None
|
| 99 |
+
if len(video_files) > 1:
|
| 100 |
+
try:
|
| 101 |
+
print(f"Creating stitched video from {len(video_files)} videos...")
|
| 102 |
+
stitched_video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
|
| 103 |
+
create_multi_stitched_video(video_files, stitched_video_path)
|
| 104 |
+
print(f"Stitched video created: {stitched_video_path}")
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(f"Error creating stitched video: {e}")
|
| 107 |
+
stitched_video_path = None
|
| 108 |
+
elif len(video_files) == 1:
|
| 109 |
+
# If only one video, just use it directly
|
| 110 |
+
stitched_video_path = video_files[0]
|
| 111 |
+
|
| 112 |
+
# Clean up individual video files after stitching
|
| 113 |
+
for video_file in video_files:
|
| 114 |
+
if video_file != stitched_video_path: # Don't delete the final output
|
| 115 |
+
cleanup_temp_video(video_file)
|
| 116 |
+
|
| 117 |
+
return {
|
| 118 |
+
"status": "success",
|
| 119 |
+
"videos": videos,
|
| 120 |
+
"video_count": len(videos),
|
| 121 |
+
"gloss": gloss,
|
| 122 |
+
"cleaned_tokens": cleaned_tokens
|
| 123 |
+
}, stitched_video_path
|
| 124 |
+
|
| 125 |
+
# Create a synchronous wrapper for Gradio
|
| 126 |
+
def parse_vectorize_and_search_sync(file):
|
| 127 |
+
return asyncio.run(parse_vectorize_and_search(file))
|
| 128 |
+
|
| 129 |
+
def download_video_from_url(video_url):
|
| 130 |
+
"""
|
| 131 |
+
Download a video from a public R2 URL
|
| 132 |
+
Returns the local file path where the video is saved
|
| 133 |
+
"""
|
| 134 |
+
try:
|
| 135 |
+
# Create a temporary file with .mp4 extension
|
| 136 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
| 137 |
+
temp_path = temp_file.name
|
| 138 |
+
temp_file.close()
|
| 139 |
+
|
| 140 |
+
# Download the video
|
| 141 |
+
print(f"Downloading video from: {video_url}")
|
| 142 |
+
response = requests.get(video_url, stream=True)
|
| 143 |
+
response.raise_for_status()
|
| 144 |
+
|
| 145 |
+
# Save to temporary file
|
| 146 |
+
with open(temp_path, 'wb') as f:
|
| 147 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 148 |
+
f.write(chunk)
|
| 149 |
+
|
| 150 |
+
print(f"Video downloaded to: {temp_path}")
|
| 151 |
+
return temp_path
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
print(f"Error downloading video: {e}")
|
| 155 |
+
return None
|
| 156 |
+
|
| 157 |
+
def cleanup_temp_video(file_path):
|
| 158 |
+
"""
|
| 159 |
+
Clean up temporary video file
|
| 160 |
+
"""
|
| 161 |
+
try:
|
| 162 |
+
if file_path and os.path.exists(file_path):
|
| 163 |
+
os.unlink(file_path)
|
| 164 |
+
print(f"Cleaned up: {file_path}")
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print(f"Error cleaning up file: {e}")
|
| 167 |
+
|
| 168 |
+
intf = gr.Interface(
|
| 169 |
+
fn=parse_vectorize_and_search_sync,
|
| 170 |
+
inputs=inputs,
|
| 171 |
+
outputs=outputs,
|
| 172 |
+
title=title,
|
| 173 |
+
description=description,
|
| 174 |
+
article=article
|
| 175 |
+
)
|
| 176 |
+
intf.launch(share=True)
|
asl_gloss.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
ASL Gloss Converter using Claude API
|
| 4 |
+
|
| 5 |
+
This script converts English text input and convert it to ASL gloss format.
|
| 6 |
+
ASL gloss is a written representation of sign language
|
| 7 |
+
that preserves the spatial and grammatical structure of ASL.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import sys
|
| 12 |
+
import argparse
|
| 13 |
+
import json
|
| 14 |
+
from typing import Optional, Dict, Any
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
import anthropic
|
| 19 |
+
except ImportError:
|
| 20 |
+
print("Error: anthropic package not found. Please install it with:")
|
| 21 |
+
print("pip install anthropic")
|
| 22 |
+
sys.exit(1)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ASLGlossConverter:
|
| 26 |
+
"""
|
| 27 |
+
Converts English text to ASL gloss using Claude's API.
|
| 28 |
+
|
| 29 |
+
ASL gloss preserves the spatial and grammatical structure of American Sign Language,
|
| 30 |
+
including features like:
|
| 31 |
+
- Topic-comment structure
|
| 32 |
+
- Spatial referencing
|
| 33 |
+
- Non-manual markers (facial expressions, head movements)
|
| 34 |
+
- Classifier predicates
|
| 35 |
+
- Time indicators
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 39 |
+
"""
|
| 40 |
+
Initialize the ASL gloss converter.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
api_key: Anthropic API key. If not provided, will look for ANTHROPIC_API_KEY env var.
|
| 44 |
+
"""
|
| 45 |
+
self.api_key = api_key or os.getenv('ANTHROPIC_API_KEY')
|
| 46 |
+
if not self.api_key:
|
| 47 |
+
raise ValueError(
|
| 48 |
+
"API key not provided. Set ANTHROPIC_API_KEY environment variable "
|
| 49 |
+
"or pass api_key parameter."
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
| 53 |
+
|
| 54 |
+
# System prompt that defines ASL gloss conversion rules
|
| 55 |
+
self.system_prompt = """You are an expert in American Sign Language (ASL) and ASL gloss. Your task is to convert English text to ASL gloss format with bracketed phrases.
|
| 56 |
+
|
| 57 |
+
ASL GLOSS RULES:
|
| 58 |
+
1. Use ALL CAPS for all signs
|
| 59 |
+
2. Group related words/concepts into bracketed phrases [PHRASE]
|
| 60 |
+
3. Use underscores (_) to connect words within a phrase that are signed together
|
| 61 |
+
4. Use classifiers (CL:1, CL:3, CL:C, etc.) for spatial relationships
|
| 62 |
+
5. Use pronouns: I (first person), YOU (second person), HE/SHE/THEY (third person)
|
| 63 |
+
6. Use time indicators: PAST, FUTURE, NOW, ALWAYS, NEVER
|
| 64 |
+
7. Use topic-comment structure: TOPIC COMMENT
|
| 65 |
+
8. Use rhetorical questions: RHQ
|
| 66 |
+
9. Use conditional markers: IF-THEN
|
| 67 |
+
10. Use negation: NOT, NONE, CAN'T, DON'T-WANT
|
| 68 |
+
11. Use aspect markers: FINISH, CONTINUE, REPEAT
|
| 69 |
+
12. Use directional verbs: GIVE-TO, TELL-TO, ASK-TO
|
| 70 |
+
13. Use location markers: HERE, THERE, WHERE
|
| 71 |
+
14. Use manner adverbs: FAST, SLOW, CAREFUL, HARD
|
| 72 |
+
|
| 73 |
+
PHRASE GROUPING GUIDELINES:
|
| 74 |
+
- Group compound expressions: [GOOD_MORNING], [THANK_YOU], [HOW_ARE_YOU]
|
| 75 |
+
- Keep names as single phrases: [JOHN], [NATALIIA], [CHRISTOPHER_ROBIN]
|
| 76 |
+
- Group related concepts: [MY_NAME], [YOUR_HOUSE], [LAST_WEEK]
|
| 77 |
+
- Keep simple words separate: [I] [LOVE] [YOU]
|
| 78 |
+
|
| 79 |
+
IMPORTANT: Output ONLY the bracketed ASL phrases. Each phrase should be in ALL CAPS with underscores connecting related words.
|
| 80 |
+
|
| 81 |
+
EXAMPLES:
|
| 82 |
+
- "Good morning, Brian" → [GOOD_MORNING] [BRIAN]
|
| 83 |
+
- "My name is Nataliia" → [I] [NAME] [NATALIIA]
|
| 84 |
+
- "I love you" → [I] [LOVE] [YOU]
|
| 85 |
+
- "What is your name?" → [YOU] [NAME] [WHAT]
|
| 86 |
+
- "I don't understand" → [I] [UNDERSTAND] [NOT]
|
| 87 |
+
- "Where is the bathroom?" → [BATHROOM] [WHERE]
|
| 88 |
+
- "I want to go home" → [I] [WANT] [GO] [HOME]
|
| 89 |
+
- "The cat is sleeping" → [CAT] [SLEEP]
|
| 90 |
+
- "I finished my homework" → [I] [HOMEWORK] [FINISH]
|
| 91 |
+
- "Do you want coffee?" → [YOU] [WANT] [COFFEE]
|
| 92 |
+
- "I can't hear you" → [I] [HEAR] [YOU] [CAN'T]
|
| 93 |
+
- "The weather is nice today" → [TODAY] [WEATHER] [NICE]
|
| 94 |
+
- "Thank you very much" → [THANK_YOU] [VERY_MUCH]
|
| 95 |
+
- "How are you doing?" → [HOW_ARE_YOU] [DOING]
|
| 96 |
+
- "See you later" → [SEE_YOU_LATER]
|
| 97 |
+
- "I work at Google" → [I] [WORK] [GOOGLE]
|
| 98 |
+
|
| 99 |
+
Convert the given English text to proper ASL gloss format with bracketed phrases, maintaining the meaning and intent while following ASL grammar and structure."""
|
| 100 |
+
|
| 101 |
+
def convert_text(self, english_text: str) -> str:
|
| 102 |
+
"""
|
| 103 |
+
Convert English text to ASL gloss using Anthropic v1.x messages API.
|
| 104 |
+
"""
|
| 105 |
+
try:
|
| 106 |
+
message = self.client.messages.create(
|
| 107 |
+
model="claude-3-5-sonnet-20240620",
|
| 108 |
+
max_tokens=1000,
|
| 109 |
+
system=self.system_prompt,
|
| 110 |
+
messages=[
|
| 111 |
+
{"role": "user", "content": f"Convert this English text to ASL gloss:\n\n{english_text}"}
|
| 112 |
+
]
|
| 113 |
+
)
|
| 114 |
+
return message.content[0].text.strip()
|
| 115 |
+
except Exception as e:
|
| 116 |
+
raise Exception(f"Error converting text to ASL gloss: {str(e)}")
|
| 117 |
+
|
| 118 |
+
def convert_file(self, input_file: str, output_file: Optional[str] = None) -> str:
|
| 119 |
+
"""
|
| 120 |
+
Convert text from a file to ASL gloss.
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
input_file: Path to input text file
|
| 124 |
+
output_file: Path to output file (optional)
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
The ASL gloss text
|
| 128 |
+
"""
|
| 129 |
+
try:
|
| 130 |
+
# Read input file
|
| 131 |
+
with open(input_file, 'r', encoding='utf-8') as f:
|
| 132 |
+
english_text = f.read().strip()
|
| 133 |
+
|
| 134 |
+
if not english_text:
|
| 135 |
+
raise ValueError("Input file is empty")
|
| 136 |
+
|
| 137 |
+
# Convert to ASL gloss
|
| 138 |
+
asl_gloss = self.convert_text(english_text)
|
| 139 |
+
|
| 140 |
+
# Write to output file if specified
|
| 141 |
+
if output_file:
|
| 142 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 143 |
+
f.write(asl_gloss)
|
| 144 |
+
print(f"ASL gloss saved to: {output_file}")
|
| 145 |
+
|
| 146 |
+
return asl_gloss
|
| 147 |
+
|
| 148 |
+
except FileNotFoundError:
|
| 149 |
+
raise Exception(f"Input file not found: {input_file}")
|
| 150 |
+
except Exception as e:
|
| 151 |
+
raise Exception(f"Error processing file: {str(e)}")
|
| 152 |
+
|
| 153 |
+
def batch_convert(self, input_files: list, output_dir: Optional[str] = None) -> Dict[str, str]:
|
| 154 |
+
"""
|
| 155 |
+
Convert multiple files to ASL gloss.
|
| 156 |
+
|
| 157 |
+
Args:
|
| 158 |
+
input_files: List of input file paths
|
| 159 |
+
output_dir: Directory to save output files (optional)
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
Dictionary mapping input files to their ASL gloss
|
| 163 |
+
"""
|
| 164 |
+
results = {}
|
| 165 |
+
|
| 166 |
+
for input_file in input_files:
|
| 167 |
+
try:
|
| 168 |
+
print(f"Converting: {input_file}")
|
| 169 |
+
|
| 170 |
+
if output_dir:
|
| 171 |
+
# Create output filename
|
| 172 |
+
input_path = Path(input_file)
|
| 173 |
+
output_filename = f"{input_path.stem}_asl_gloss{input_path.suffix}"
|
| 174 |
+
output_file = Path(output_dir) / output_filename
|
| 175 |
+
else:
|
| 176 |
+
output_file = None
|
| 177 |
+
|
| 178 |
+
asl_gloss = self.convert_file(input_file, str(output_file) if output_file else None)
|
| 179 |
+
results[input_file] = asl_gloss
|
| 180 |
+
|
| 181 |
+
print(f"✓ Completed: {input_file}")
|
| 182 |
+
|
| 183 |
+
except Exception as e:
|
| 184 |
+
print(f"✗ Error processing {input_file}: {str(e)}")
|
| 185 |
+
results[input_file] = f"ERROR: {str(e)}"
|
| 186 |
+
|
| 187 |
+
return results
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def main():
|
| 191 |
+
"""Main function for command-line usage."""
|
| 192 |
+
parser = argparse.ArgumentParser(
|
| 193 |
+
description="Convert English text to ASL gloss using Claude's API",
|
| 194 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 195 |
+
epilog="""
|
| 196 |
+
Examples:
|
| 197 |
+
# Convert text directly
|
| 198 |
+
python asl_gloss.py "Hello, how are you?"
|
| 199 |
+
|
| 200 |
+
# Convert from file
|
| 201 |
+
python asl_gloss.py -f input.txt
|
| 202 |
+
|
| 203 |
+
# Convert from file with output
|
| 204 |
+
python asl_gloss.py -f input.txt -o output.txt
|
| 205 |
+
|
| 206 |
+
# Batch convert multiple files
|
| 207 |
+
python asl_gloss.py -b file1.txt file2.txt -d output_dir/
|
| 208 |
+
|
| 209 |
+
# Interactive mode
|
| 210 |
+
python asl_gloss.py -i
|
| 211 |
+
"""
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
parser.add_argument(
|
| 215 |
+
'text',
|
| 216 |
+
nargs='?',
|
| 217 |
+
help='English text to convert to ASL gloss'
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
parser.add_argument(
|
| 221 |
+
'-f', '--file',
|
| 222 |
+
help='Input file containing English text'
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
parser.add_argument(
|
| 226 |
+
'-o', '--output',
|
| 227 |
+
help='Output file for ASL gloss'
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
parser.add_argument(
|
| 231 |
+
'-b', '--batch',
|
| 232 |
+
nargs='+',
|
| 233 |
+
help='Batch convert multiple files'
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
parser.add_argument(
|
| 237 |
+
'-d', '--output-dir',
|
| 238 |
+
help='Output directory for batch conversion'
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
parser.add_argument(
|
| 242 |
+
'-i', '--interactive',
|
| 243 |
+
action='store_true',
|
| 244 |
+
help='Run in interactive mode'
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
parser.add_argument(
|
| 248 |
+
'--api-key',
|
| 249 |
+
help='Anthropic API key (or set ANTHROPIC_API_KEY env var)'
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
args = parser.parse_args()
|
| 253 |
+
|
| 254 |
+
try:
|
| 255 |
+
# Initialize converter
|
| 256 |
+
converter = ASLGlossConverter(api_key=args.api_key)
|
| 257 |
+
|
| 258 |
+
if args.interactive:
|
| 259 |
+
print("ASL Gloss Converter - Interactive Mode")
|
| 260 |
+
print("Enter English text to convert to ASL gloss (or 'quit' to exit):")
|
| 261 |
+
print("-" * 50)
|
| 262 |
+
|
| 263 |
+
while True:
|
| 264 |
+
try:
|
| 265 |
+
text = input("\nEnglish text: ").strip()
|
| 266 |
+
if text.lower() in ['quit', 'exit', 'q']:
|
| 267 |
+
break
|
| 268 |
+
|
| 269 |
+
if not text:
|
| 270 |
+
continue
|
| 271 |
+
|
| 272 |
+
print("Converting...")
|
| 273 |
+
asl_gloss = converter.convert_text(text)
|
| 274 |
+
print(f"ASL Gloss: {asl_gloss}")
|
| 275 |
+
|
| 276 |
+
except KeyboardInterrupt:
|
| 277 |
+
print("\nExiting...")
|
| 278 |
+
break
|
| 279 |
+
except Exception as e:
|
| 280 |
+
print(f"Error: {str(e)}")
|
| 281 |
+
|
| 282 |
+
elif args.batch:
|
| 283 |
+
if not args.batch:
|
| 284 |
+
print("Error: No files specified for batch conversion")
|
| 285 |
+
return 1
|
| 286 |
+
|
| 287 |
+
print(f"Batch converting {len(args.batch)} files...")
|
| 288 |
+
results = converter.batch_convert(args.batch, args.output_dir)
|
| 289 |
+
|
| 290 |
+
print("\nResults:")
|
| 291 |
+
for input_file, result in results.items():
|
| 292 |
+
print(f"\n{input_file}:")
|
| 293 |
+
print(result)
|
| 294 |
+
|
| 295 |
+
elif args.file:
|
| 296 |
+
asl_gloss = converter.convert_file(args.file, args.output)
|
| 297 |
+
if not args.output:
|
| 298 |
+
print("ASL Gloss:")
|
| 299 |
+
print(asl_gloss)
|
| 300 |
+
|
| 301 |
+
elif args.text:
|
| 302 |
+
asl_gloss = converter.convert_text(args.text)
|
| 303 |
+
print("ASL Gloss:")
|
| 304 |
+
print(asl_gloss)
|
| 305 |
+
|
| 306 |
+
if args.output:
|
| 307 |
+
with open(args.output, 'w', encoding='utf-8') as f:
|
| 308 |
+
f.write(asl_gloss)
|
| 309 |
+
print(f"\nSaved to: {args.output}")
|
| 310 |
+
|
| 311 |
+
else:
|
| 312 |
+
parser.print_help()
|
| 313 |
+
return 1
|
| 314 |
+
|
| 315 |
+
return 0
|
| 316 |
+
|
| 317 |
+
except Exception as e:
|
| 318 |
+
print(f"Error: {str(e)}")
|
| 319 |
+
return 1
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
if __name__ == "__main__":
|
| 323 |
+
sys.exit(main())
|
document_parsing.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Optional, Union
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
# Import document parsing libraries
|
| 8 |
+
try:
|
| 9 |
+
import PyPDF2
|
| 10 |
+
from docx import Document
|
| 11 |
+
import ebooklib
|
| 12 |
+
from ebooklib import epub
|
| 13 |
+
from bs4 import BeautifulSoup
|
| 14 |
+
except ImportError as e:
|
| 15 |
+
print(f"Missing required dependency: {e}")
|
| 16 |
+
print("Please install dependencies with: pip install -r requirements.txt")
|
| 17 |
+
sys.exit(1)
|
| 18 |
+
|
| 19 |
+
# Configure logging
|
| 20 |
+
logging.basicConfig(level=logging.INFO)
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class DocumentParser:
|
| 25 |
+
"""
|
| 26 |
+
A class to parse and extract text from various document formats.
|
| 27 |
+
Supports PDF, TXT, DOC, DOCX, and EPUB files.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
def __init__(self):
|
| 31 |
+
self.supported_formats = {
|
| 32 |
+
'application/pdf': self._parse_pdf,
|
| 33 |
+
'text/plain': self._parse_txt,
|
| 34 |
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': self._parse_docx,
|
| 35 |
+
'application/msword': self._parse_doc,
|
| 36 |
+
'application/epub+zip': self._parse_epub
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
def get_file_type(self, file_path: Union[str, Path]) -> str:
|
| 40 |
+
"""
|
| 41 |
+
Detect the MIME type of a file using file extension.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
file_path: Path to the file
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
MIME type string
|
| 48 |
+
"""
|
| 49 |
+
return self._get_mime_from_extension(file_path)
|
| 50 |
+
|
| 51 |
+
def _get_mime_from_extension(self, file_path: Union[str, Path]) -> str:
|
| 52 |
+
"""
|
| 53 |
+
Determine MIME type from file extension.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
file_path: Path to the file
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
MIME type string
|
| 60 |
+
"""
|
| 61 |
+
extension = Path(file_path).suffix.lower()
|
| 62 |
+
extension_map = {
|
| 63 |
+
'.pdf': 'application/pdf',
|
| 64 |
+
'.txt': 'text/plain',
|
| 65 |
+
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
| 66 |
+
'.doc': 'application/msword',
|
| 67 |
+
'.epub': 'application/epub+zip'
|
| 68 |
+
}
|
| 69 |
+
return extension_map.get(extension, 'unknown')
|
| 70 |
+
|
| 71 |
+
def extract_text(self, file_path: Union[str, Path]) -> Optional[str]:
|
| 72 |
+
"""
|
| 73 |
+
Extract text from a document file.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
file_path: Path to the document file
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
Extracted text as string, or None if extraction fails
|
| 80 |
+
"""
|
| 81 |
+
file_path = Path(file_path)
|
| 82 |
+
|
| 83 |
+
if not file_path.exists():
|
| 84 |
+
logger.error(f"File not found: {file_path}")
|
| 85 |
+
return None
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
mime_type = self.get_file_type(file_path)
|
| 89 |
+
logger.info(f"Detected file type: {mime_type}")
|
| 90 |
+
|
| 91 |
+
if mime_type in self.supported_formats:
|
| 92 |
+
return self.supported_formats[mime_type](file_path)
|
| 93 |
+
else:
|
| 94 |
+
logger.error(f"Unsupported file type: {mime_type}")
|
| 95 |
+
return None
|
| 96 |
+
|
| 97 |
+
except Exception as e:
|
| 98 |
+
logger.error(f"Error extracting text from {file_path}: {e}")
|
| 99 |
+
return None
|
| 100 |
+
|
| 101 |
+
def _parse_pdf(self, file_path: Path) -> str:
|
| 102 |
+
"""
|
| 103 |
+
Extract text from PDF file.
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
file_path: Path to PDF file
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
Extracted text
|
| 110 |
+
"""
|
| 111 |
+
text = ""
|
| 112 |
+
try:
|
| 113 |
+
with open(file_path, 'rb') as file:
|
| 114 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
| 115 |
+
|
| 116 |
+
for page_num in range(len(pdf_reader.pages)):
|
| 117 |
+
page = pdf_reader.pages[page_num]
|
| 118 |
+
page_text = page.extract_text()
|
| 119 |
+
if page_text:
|
| 120 |
+
text += page_text + "\n"
|
| 121 |
+
|
| 122 |
+
except Exception as e:
|
| 123 |
+
logger.error(f"Error parsing PDF {file_path}: {e}")
|
| 124 |
+
raise
|
| 125 |
+
|
| 126 |
+
return text.strip()
|
| 127 |
+
|
| 128 |
+
def _parse_txt(self, file_path: Path) -> str:
|
| 129 |
+
"""
|
| 130 |
+
Extract text from plain text file.
|
| 131 |
+
|
| 132 |
+
Args:
|
| 133 |
+
file_path: Path to text file
|
| 134 |
+
|
| 135 |
+
Returns:
|
| 136 |
+
Extracted text
|
| 137 |
+
"""
|
| 138 |
+
try:
|
| 139 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
| 140 |
+
return file.read()
|
| 141 |
+
except UnicodeDecodeError:
|
| 142 |
+
# Try with different encoding
|
| 143 |
+
try:
|
| 144 |
+
with open(file_path, 'r', encoding='latin-1') as file:
|
| 145 |
+
return file.read()
|
| 146 |
+
except Exception as e:
|
| 147 |
+
logger.error(f"Error reading text file {file_path}: {e}")
|
| 148 |
+
raise
|
| 149 |
+
except Exception as e:
|
| 150 |
+
logger.error(f"Error reading text file {file_path}: {e}")
|
| 151 |
+
raise
|
| 152 |
+
|
| 153 |
+
def _parse_docx(self, file_path: Path) -> str:
|
| 154 |
+
"""
|
| 155 |
+
Extract text from DOCX file.
|
| 156 |
+
|
| 157 |
+
Args:
|
| 158 |
+
file_path: Path to DOCX file
|
| 159 |
+
|
| 160 |
+
Returns:
|
| 161 |
+
Extracted text
|
| 162 |
+
"""
|
| 163 |
+
try:
|
| 164 |
+
doc = Document(file_path)
|
| 165 |
+
text = ""
|
| 166 |
+
|
| 167 |
+
for paragraph in doc.paragraphs:
|
| 168 |
+
text += paragraph.text + "\n"
|
| 169 |
+
|
| 170 |
+
return text.strip()
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
logger.error(f"Error parsing DOCX {file_path}: {e}")
|
| 174 |
+
raise
|
| 175 |
+
|
| 176 |
+
def _parse_doc(self, file_path: Path) -> str:
|
| 177 |
+
"""
|
| 178 |
+
Extract text from DOC file (legacy Word format).
|
| 179 |
+
Note: This requires additional dependencies like antiword or catdoc.
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
file_path: Path to DOC file
|
| 183 |
+
|
| 184 |
+
Returns:
|
| 185 |
+
Extracted text
|
| 186 |
+
"""
|
| 187 |
+
try:
|
| 188 |
+
# Try using antiword if available
|
| 189 |
+
import subprocess
|
| 190 |
+
result = subprocess.run(['antiword', str(file_path)],
|
| 191 |
+
capture_output=True, text=True)
|
| 192 |
+
if result.returncode == 0:
|
| 193 |
+
return result.stdout.strip()
|
| 194 |
+
|
| 195 |
+
# Fallback: try catdoc
|
| 196 |
+
result = subprocess.run(['catdoc', str(file_path)],
|
| 197 |
+
capture_output=True, text=True)
|
| 198 |
+
if result.returncode == 0:
|
| 199 |
+
return result.stdout.strip()
|
| 200 |
+
|
| 201 |
+
raise Exception("Neither antiword nor catdoc found. Please install one of them.")
|
| 202 |
+
|
| 203 |
+
except FileNotFoundError:
|
| 204 |
+
raise Exception("antiword or catdoc not found. Please install one of them for DOC file support.")
|
| 205 |
+
except Exception as e:
|
| 206 |
+
logger.error(f"Error parsing DOC {file_path}: {e}")
|
| 207 |
+
raise
|
| 208 |
+
|
| 209 |
+
def _parse_epub(self, file_path: Path) -> str:
|
| 210 |
+
"""
|
| 211 |
+
Extract text from EPUB file.
|
| 212 |
+
|
| 213 |
+
Args:
|
| 214 |
+
file_path: Path to EPUB file
|
| 215 |
+
|
| 216 |
+
Returns:
|
| 217 |
+
Extracted text
|
| 218 |
+
"""
|
| 219 |
+
try:
|
| 220 |
+
book = epub.read_epub(file_path)
|
| 221 |
+
text = ""
|
| 222 |
+
|
| 223 |
+
for item in book.get_items():
|
| 224 |
+
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
| 225 |
+
content = item.get_content().decode('utf-8')
|
| 226 |
+
soup = BeautifulSoup(content, 'html.parser')
|
| 227 |
+
text += soup.get_text() + "\n"
|
| 228 |
+
|
| 229 |
+
return text.strip()
|
| 230 |
+
|
| 231 |
+
except Exception as e:
|
| 232 |
+
logger.error(f"Error parsing EPUB {file_path}: {e}")
|
| 233 |
+
raise
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def main():
|
| 237 |
+
"""
|
| 238 |
+
Main function to demonstrate usage of the DocumentParser.
|
| 239 |
+
"""
|
| 240 |
+
if len(sys.argv) != 2:
|
| 241 |
+
print("Usage: python document_parsing.py <file_path>")
|
| 242 |
+
print("Supported formats: PDF, TXT, DOC, DOCX, EPUB")
|
| 243 |
+
sys.exit(1)
|
| 244 |
+
|
| 245 |
+
file_path = sys.argv[1]
|
| 246 |
+
parser = DocumentParser()
|
| 247 |
+
|
| 248 |
+
print(f"Extracting text from: {file_path}")
|
| 249 |
+
print("-" * 50)
|
| 250 |
+
|
| 251 |
+
extracted_text = parser.extract_text(file_path)
|
| 252 |
+
|
| 253 |
+
if extracted_text:
|
| 254 |
+
print("Extracted text:")
|
| 255 |
+
print(extracted_text)
|
| 256 |
+
print(f"\nTotal characters: {len(extracted_text)}")
|
| 257 |
+
else:
|
| 258 |
+
print("Failed to extract text from the file.")
|
| 259 |
+
sys.exit(1)
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
if __name__ == "__main__":
|
| 263 |
+
main()
|
document_to_gloss.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Document to ASL Gloss Converter
|
| 4 |
+
|
| 5 |
+
This script combines document parsing and ASL glossing to convert
|
| 6 |
+
uploaded documents (PDF, TXT, DOC, DOCX, EPUB) directly to ASL gloss format.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import sys
|
| 11 |
+
import argparse
|
| 12 |
+
from typing import Optional, Dict, Any
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
# Import our existing modules
|
| 16 |
+
from document_parsing import DocumentParser
|
| 17 |
+
from asl_gloss import ASLGlossConverter
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class DocumentToASLConverter:
|
| 21 |
+
"""
|
| 22 |
+
Combines document parsing and ASL glossing functionality.
|
| 23 |
+
Extracts text from various document formats and converts to ASL gloss.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 27 |
+
"""
|
| 28 |
+
Initialize the document to ASL converter.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
api_key: Anthropic API key. If not provided, will look for ANTHROPIC_API_KEY env var.
|
| 32 |
+
"""
|
| 33 |
+
self.document_parser = DocumentParser()
|
| 34 |
+
self.asl_converter = ASLGlossConverter(api_key=api_key)
|
| 35 |
+
|
| 36 |
+
def convert_document(self, document_path: str, output_file: Optional[str] = None) -> str:
|
| 37 |
+
"""
|
| 38 |
+
Convert a document file to ASL gloss.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
document_path: Path to the document file
|
| 42 |
+
output_file: Path to output file (optional)
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
The ASL gloss text
|
| 46 |
+
"""
|
| 47 |
+
try:
|
| 48 |
+
print(f"Processing document: {document_path}")
|
| 49 |
+
|
| 50 |
+
# Step 1: Extract text from document
|
| 51 |
+
print("Step 1: Extracting text from document...")
|
| 52 |
+
extracted_text = self.document_parser.extract_text(document_path)
|
| 53 |
+
|
| 54 |
+
if not extracted_text:
|
| 55 |
+
raise Exception("Failed to extract text from document")
|
| 56 |
+
|
| 57 |
+
print(f"✓ Extracted {len(extracted_text)} characters")
|
| 58 |
+
|
| 59 |
+
# Step 2: Convert text to ASL gloss
|
| 60 |
+
print("Step 2: Converting to ASL gloss...")
|
| 61 |
+
asl_gloss = self.asl_converter.convert_text(extracted_text)
|
| 62 |
+
|
| 63 |
+
print("✓ ASL gloss conversion completed")
|
| 64 |
+
|
| 65 |
+
# Step 3: Save to output file if specified
|
| 66 |
+
if output_file:
|
| 67 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 68 |
+
f.write(asl_gloss)
|
| 69 |
+
print(f"✓ ASL gloss saved to: {output_file}")
|
| 70 |
+
|
| 71 |
+
return asl_gloss
|
| 72 |
+
|
| 73 |
+
except Exception as e:
|
| 74 |
+
raise Exception(f"Error processing document: {str(e)}")
|
| 75 |
+
|
| 76 |
+
def batch_convert_documents(self, document_paths: list, output_dir: Optional[str] = None) -> Dict[str, str]:
|
| 77 |
+
"""
|
| 78 |
+
Convert multiple documents to ASL gloss.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
document_paths: List of document file paths
|
| 82 |
+
output_dir: Directory to save output files (optional)
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
Dictionary mapping input files to their ASL gloss
|
| 86 |
+
"""
|
| 87 |
+
results = {}
|
| 88 |
+
|
| 89 |
+
for document_path in document_paths:
|
| 90 |
+
try:
|
| 91 |
+
print(f"\n{'='*50}")
|
| 92 |
+
print(f"Converting: {document_path}")
|
| 93 |
+
print(f"{'='*50}")
|
| 94 |
+
|
| 95 |
+
if output_dir:
|
| 96 |
+
# Create output filename
|
| 97 |
+
input_path = Path(document_path)
|
| 98 |
+
output_filename = f"{input_path.stem}_asl_gloss.txt"
|
| 99 |
+
output_file = Path(output_dir) / output_filename
|
| 100 |
+
else:
|
| 101 |
+
output_file = None
|
| 102 |
+
|
| 103 |
+
asl_gloss = self.convert_document(document_path, str(output_file) if output_file else None)
|
| 104 |
+
results[document_path] = asl_gloss
|
| 105 |
+
|
| 106 |
+
print(f"✓ Completed: {document_path}")
|
| 107 |
+
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"✗ Error processing {document_path}: {str(e)}")
|
| 110 |
+
results[document_path] = f"ERROR: {str(e)}"
|
| 111 |
+
|
| 112 |
+
return results
|
| 113 |
+
|
| 114 |
+
def get_supported_formats(self) -> list:
|
| 115 |
+
"""
|
| 116 |
+
Get list of supported document formats.
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
List of supported file extensions
|
| 120 |
+
"""
|
| 121 |
+
return ['.pdf', '.txt', '.docx', '.doc', '.epub']
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def main():
|
| 125 |
+
"""Main function for command-line usage."""
|
| 126 |
+
parser = argparse.ArgumentParser(
|
| 127 |
+
description="Convert documents to ASL gloss using Claude's API",
|
| 128 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 129 |
+
epilog="""
|
| 130 |
+
Examples:
|
| 131 |
+
# Convert a single document
|
| 132 |
+
python document_to_asl.py document.pdf
|
| 133 |
+
|
| 134 |
+
# Convert document with output file
|
| 135 |
+
python document_to_asl.py document.pdf -o output.txt
|
| 136 |
+
|
| 137 |
+
# Batch convert multiple documents
|
| 138 |
+
python document_to_asl.py -b doc1.pdf doc2.docx doc3.txt -d output_dir/
|
| 139 |
+
|
| 140 |
+
# Interactive mode
|
| 141 |
+
python document_to_asl.py -i
|
| 142 |
+
|
| 143 |
+
# Show supported formats
|
| 144 |
+
python document_to_asl.py --formats
|
| 145 |
+
"""
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
parser.add_argument(
|
| 149 |
+
'document',
|
| 150 |
+
nargs='?',
|
| 151 |
+
help='Document file to convert to ASL gloss'
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
parser.add_argument(
|
| 155 |
+
'-o', '--output',
|
| 156 |
+
help='Output file for ASL gloss'
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
parser.add_argument(
|
| 160 |
+
'-b', '--batch',
|
| 161 |
+
nargs='+',
|
| 162 |
+
help='Batch convert multiple documents'
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
parser.add_argument(
|
| 166 |
+
'-d', '--output-dir',
|
| 167 |
+
help='Output directory for batch conversion'
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
parser.add_argument(
|
| 171 |
+
'-i', '--interactive',
|
| 172 |
+
action='store_true',
|
| 173 |
+
help='Run in interactive mode'
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
parser.add_argument(
|
| 177 |
+
'--formats',
|
| 178 |
+
action='store_true',
|
| 179 |
+
help='Show supported document formats'
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
parser.add_argument(
|
| 183 |
+
'--api-key',
|
| 184 |
+
help='Anthropic API key (or set ANTHROPIC_API_KEY env var)'
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
args = parser.parse_args()
|
| 188 |
+
|
| 189 |
+
try:
|
| 190 |
+
# Initialize converter
|
| 191 |
+
converter = DocumentToASLConverter(api_key=args.api_key)
|
| 192 |
+
|
| 193 |
+
if args.formats:
|
| 194 |
+
print("Supported Document Formats:")
|
| 195 |
+
print("=" * 30)
|
| 196 |
+
formats = converter.get_supported_formats()
|
| 197 |
+
for fmt in formats:
|
| 198 |
+
print(f" • {fmt}")
|
| 199 |
+
print("\nExamples: .pdf, .txt, .docx, .doc, .epub")
|
| 200 |
+
return 0
|
| 201 |
+
|
| 202 |
+
if args.interactive:
|
| 203 |
+
print("Document to ASL Gloss Converter - Interactive Mode")
|
| 204 |
+
print("Enter document file paths to convert (or 'quit' to exit):")
|
| 205 |
+
print("-" * 60)
|
| 206 |
+
|
| 207 |
+
while True:
|
| 208 |
+
try:
|
| 209 |
+
doc_path = input("\nDocument path: ").strip()
|
| 210 |
+
if doc_path.lower() in ['quit', 'exit', 'q']:
|
| 211 |
+
break
|
| 212 |
+
|
| 213 |
+
if not doc_path:
|
| 214 |
+
continue
|
| 215 |
+
|
| 216 |
+
if not os.path.exists(doc_path):
|
| 217 |
+
print(f"Error: File not found: {doc_path}")
|
| 218 |
+
continue
|
| 219 |
+
|
| 220 |
+
# Ask for output file
|
| 221 |
+
output_file = input("Output file (optional, press Enter to skip): ").strip()
|
| 222 |
+
if not output_file:
|
| 223 |
+
output_file = None
|
| 224 |
+
|
| 225 |
+
print("Converting...")
|
| 226 |
+
asl_gloss = converter.convert_document(doc_path, output_file)
|
| 227 |
+
|
| 228 |
+
if not output_file:
|
| 229 |
+
print("\nASL Gloss:")
|
| 230 |
+
print("-" * 20)
|
| 231 |
+
print(asl_gloss)
|
| 232 |
+
|
| 233 |
+
except KeyboardInterrupt:
|
| 234 |
+
print("\nExiting...")
|
| 235 |
+
break
|
| 236 |
+
except Exception as e:
|
| 237 |
+
print(f"Error: {str(e)}")
|
| 238 |
+
|
| 239 |
+
elif args.batch:
|
| 240 |
+
if not args.batch:
|
| 241 |
+
print("Error: No documents specified for batch conversion")
|
| 242 |
+
return 1
|
| 243 |
+
|
| 244 |
+
print(f"Batch converting {len(args.batch)} documents...")
|
| 245 |
+
results = converter.batch_convert_documents(args.batch, args.output_dir)
|
| 246 |
+
|
| 247 |
+
print("\n" + "="*60)
|
| 248 |
+
print("BATCH CONVERSION RESULTS")
|
| 249 |
+
print("="*60)
|
| 250 |
+
for doc_path, result in results.items():
|
| 251 |
+
print(f"\nDocument: {doc_path}")
|
| 252 |
+
print("-" * 40)
|
| 253 |
+
if result.startswith("ERROR:"):
|
| 254 |
+
print(f"❌ {result}")
|
| 255 |
+
else:
|
| 256 |
+
print("✅ Conversion successful")
|
| 257 |
+
if not args.output_dir:
|
| 258 |
+
print("ASL Gloss:")
|
| 259 |
+
print(result[:500] + "..." if len(result) > 500 else result)
|
| 260 |
+
|
| 261 |
+
elif args.document:
|
| 262 |
+
asl_gloss = converter.convert_document(args.document, args.output)
|
| 263 |
+
if not args.output:
|
| 264 |
+
print("\nASL Gloss:")
|
| 265 |
+
print("-" * 20)
|
| 266 |
+
print(asl_gloss)
|
| 267 |
+
|
| 268 |
+
else:
|
| 269 |
+
parser.print_help()
|
| 270 |
+
return 1
|
| 271 |
+
|
| 272 |
+
return 0
|
| 273 |
+
|
| 274 |
+
except Exception as e:
|
| 275 |
+
print(f"Error: {str(e)}")
|
| 276 |
+
return 1
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
if __name__ == "__main__":
|
| 280 |
+
sys.exit(main())
|
requirements.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
anthropic==0.54.0
|
| 2 |
+
beautifulsoup4==4.13.4
|
| 3 |
+
boto3==1.38.41
|
| 4 |
+
botocore==1.38.41
|
| 5 |
+
ebooklib==0.19
|
| 6 |
+
gensim==4.3.3
|
| 7 |
+
gradio==5.34.2
|
| 8 |
+
mediapipe==0.10.21
|
| 9 |
+
numpy<2.0
|
| 10 |
+
opencv-python
|
| 11 |
+
pandas<2.0
|
| 12 |
+
PyPDF2==3.0.1
|
| 13 |
+
python-dotenv==1.1.0
|
| 14 |
+
python-docx
|
| 15 |
+
requests==2.32.4
|
| 16 |
+
supabase==2.15.3
|
test.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to verify the fixes for the ASL gloss processing
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import re
|
| 8 |
+
from vectorizer import Vectorizer
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def clean_gloss_token(token):
|
| 12 |
+
"""
|
| 13 |
+
Clean a gloss token by removing brackets, newlines, and extra whitespace
|
| 14 |
+
"""
|
| 15 |
+
# Remove brackets and newlines
|
| 16 |
+
cleaned = re.sub(r'[\[\]\n\r]', '', token)
|
| 17 |
+
# Remove extra whitespace
|
| 18 |
+
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
|
| 19 |
+
return cleaned.lower()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_gloss_parsing():
|
| 23 |
+
"""Test the gloss parsing functionality"""
|
| 24 |
+
# Sample gloss output from the notebook
|
| 25 |
+
sample_gloss = ("ASL [BEAR] [NAME] [OSKI] [LOVE] [HONEY] [BUT] [ALWAYS] "
|
| 26 |
+
"[GET-STUCK] [TREE]\n\n[ONE_DAY] [HE] [DISCOVER] [LADDER]\n\n"
|
| 27 |
+
"[PROBLEM] [SOLVE] [FINISH]")
|
| 28 |
+
|
| 29 |
+
print("Original gloss:")
|
| 30 |
+
print(sample_gloss)
|
| 31 |
+
print("\n" + "="*50 + "\n")
|
| 32 |
+
|
| 33 |
+
# Split by spaces and clean each token
|
| 34 |
+
gloss_tokens = sample_gloss.split()
|
| 35 |
+
cleaned_tokens = []
|
| 36 |
+
|
| 37 |
+
for token in gloss_tokens:
|
| 38 |
+
cleaned = clean_gloss_token(token)
|
| 39 |
+
if cleaned: # Only add non-empty tokens
|
| 40 |
+
cleaned_tokens.append(cleaned)
|
| 41 |
+
|
| 42 |
+
print("Cleaned tokens:")
|
| 43 |
+
for i, token in enumerate(cleaned_tokens):
|
| 44 |
+
print(f"{i+1:2d}. {token}")
|
| 45 |
+
|
| 46 |
+
return cleaned_tokens
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
async def test_vectorizer():
|
| 50 |
+
"""Test the vectorizer functionality"""
|
| 51 |
+
try:
|
| 52 |
+
vectorizer = Vectorizer()
|
| 53 |
+
|
| 54 |
+
# Test with a simple word that should be in the vocabulary
|
| 55 |
+
test_words = ["BEAR", "LOVE", "TREE", "HE", "FINISH"]
|
| 56 |
+
|
| 57 |
+
for word in test_words:
|
| 58 |
+
print(f"\nTesting word: {word}")
|
| 59 |
+
result = await vectorizer.vector_query_from_supabase(word)
|
| 60 |
+
print(f"Result: {result}")
|
| 61 |
+
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Error testing vectorizer: {e}")
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
async def main():
|
| 67 |
+
"""Main test function"""
|
| 68 |
+
print("Testing ASL Gloss Processing Fixes")
|
| 69 |
+
print("=" * 50)
|
| 70 |
+
|
| 71 |
+
# Test 1: Gloss parsing
|
| 72 |
+
print("\n1. Testing gloss parsing...")
|
| 73 |
+
cleaned_tokens = test_gloss_parsing()
|
| 74 |
+
print(f"Total cleaned tokens: {len(cleaned_tokens)}")
|
| 75 |
+
|
| 76 |
+
# Test 2: Vectorizer (if environment is set up)
|
| 77 |
+
print("\n2. Testing vectorizer...")
|
| 78 |
+
await test_vectorizer()
|
| 79 |
+
|
| 80 |
+
print("\n" + "=" * 50)
|
| 81 |
+
print("Test completed!")
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
if __name__ == "__main__":
|
| 85 |
+
asyncio.run(main())
|
vectorizer.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gensim
|
| 2 |
+
import gensim.downloader
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import os
|
| 6 |
+
from supabase import acreate_client, AsyncClient
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
class Vectorizer:
|
| 10 |
+
"""
|
| 11 |
+
A class to:
|
| 12 |
+
- Generate embeddings of words
|
| 13 |
+
- Query for words from Supabase database based on vector similarity
|
| 14 |
+
- Return matching ASL videos for words
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def load_kv(self, model_name='word2vec-google-news-300'):
|
| 18 |
+
"""
|
| 19 |
+
Returns a KeyedVector object loaded from gensim
|
| 20 |
+
"""
|
| 21 |
+
try:
|
| 22 |
+
kv = gensim.downloader.load(model_name) # returns a keyedvector
|
| 23 |
+
return kv
|
| 24 |
+
except Exception as e:
|
| 25 |
+
print(f"Unable to load embedding model from gensim: {e}")
|
| 26 |
+
return None
|
| 27 |
+
|
| 28 |
+
async def initialize_supabase(self):
|
| 29 |
+
url: str = os.environ.get("SUPABASE_URL")
|
| 30 |
+
key: str = os.environ.get("SUPABASE_KEY")
|
| 31 |
+
supabase: AsyncClient = await acreate_client(url, key)
|
| 32 |
+
return supabase
|
| 33 |
+
|
| 34 |
+
def __init__(self):
|
| 35 |
+
load_dotenv()
|
| 36 |
+
self.kv = self.load_kv()
|
| 37 |
+
self.supabase = None # Will be initialized when needed
|
| 38 |
+
|
| 39 |
+
async def ensure_supabase_initialized(self):
|
| 40 |
+
"""Ensure Supabase client is initialized"""
|
| 41 |
+
if self.supabase is None:
|
| 42 |
+
self.supabase = await self.initialize_supabase()
|
| 43 |
+
|
| 44 |
+
def encode(self, word):
|
| 45 |
+
print(f"encoding {word}")
|
| 46 |
+
if self.kv is not None and word in self.kv.key_to_index:
|
| 47 |
+
return self.kv[word]
|
| 48 |
+
else:
|
| 49 |
+
print(f"Error: {word} is not in the KeyedVector's vocabulary")
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
def encode_and_format(self, word):
|
| 53 |
+
"""
|
| 54 |
+
Apply encoding function to each word.
|
| 55 |
+
Prettify the encoding to match expected format for Supabase vectors
|
| 56 |
+
"""
|
| 57 |
+
enc = self.encode(word)
|
| 58 |
+
return "[" + ",".join(map(str, enc.tolist())) + "]" if enc is not None else None
|
| 59 |
+
|
| 60 |
+
async def vector_query_from_supabase(self, query):
|
| 61 |
+
try:
|
| 62 |
+
await self.ensure_supabase_initialized()
|
| 63 |
+
query_embedding = self.encode(query)
|
| 64 |
+
if query_embedding is None:
|
| 65 |
+
return {
|
| 66 |
+
"match": False,
|
| 67 |
+
"error": f"'{query}' not in vocabulary"
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
query_embedding = query_embedding.tolist()
|
| 71 |
+
|
| 72 |
+
if self.supabase is not None:
|
| 73 |
+
result = await self.supabase.rpc(
|
| 74 |
+
"match_vector",
|
| 75 |
+
{
|
| 76 |
+
"query_embedding": query_embedding,
|
| 77 |
+
"match_threshold": 0.0,
|
| 78 |
+
"match_count": 1
|
| 79 |
+
}
|
| 80 |
+
).execute()
|
| 81 |
+
|
| 82 |
+
data = result.data
|
| 83 |
+
if data:
|
| 84 |
+
match = data[0]
|
| 85 |
+
return {
|
| 86 |
+
"match": True,
|
| 87 |
+
"query": query,
|
| 88 |
+
"matching_word": match["word"],
|
| 89 |
+
"video_url": match["video_url"],
|
| 90 |
+
"similarity": match["similarity"]
|
| 91 |
+
}
|
| 92 |
+
else:
|
| 93 |
+
return {"match": False}
|
| 94 |
+
else:
|
| 95 |
+
return {"match": False, "error": "Supabase not initialized"}
|
| 96 |
+
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"RPC call failed: {e}")
|
| 99 |
+
return {"match": False, "error": str(e)}
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def load_filtered_kv(model_name='word2vec-google-news-300', vocab=None):
|
| 103 |
+
"""
|
| 104 |
+
Returns a KeyedVector object whose vocabulary
|
| 105 |
+
consists of the words in vocab
|
| 106 |
+
"""
|
| 107 |
+
if vocab is None:
|
| 108 |
+
vocab = []
|
| 109 |
+
try:
|
| 110 |
+
# gensim.downloader.load returns a KeyedVector
|
| 111 |
+
original_kv = gensim.downloader.load(model_name)
|
| 112 |
+
if vocab:
|
| 113 |
+
filtered_key2vec_map = {}
|
| 114 |
+
for key in vocab:
|
| 115 |
+
if key in original_kv.key_to_index:
|
| 116 |
+
filtered_key2vec_map[key] = original_kv[key]
|
| 117 |
+
|
| 118 |
+
new_kv = gensim.models.KeyedVectors(
|
| 119 |
+
vector_size=original_kv.vector_size)
|
| 120 |
+
new_kv.add_vectors(list(filtered_key2vec_map.keys()),
|
| 121 |
+
np.array(list(filtered_key2vec_map.values())))
|
| 122 |
+
return original_kv
|
| 123 |
+
else:
|
| 124 |
+
return original_kv
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print(f"Unable to load embedding model from gensim: {e}")
|
| 127 |
+
return None
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
async def main():
|
| 131 |
+
vectorizer = Vectorizer()
|
| 132 |
+
|
| 133 |
+
vector = vectorizer.encode("test")
|
| 134 |
+
print(vector)
|
| 135 |
+
result = await vectorizer.vector_query_from_supabase("dog")
|
| 136 |
+
print(result)
|
| 137 |
+
result = await vectorizer.vector_query_from_supabase("cat")
|
| 138 |
+
print(result)
|
| 139 |
+
|
| 140 |
+
# read word list
|
| 141 |
+
# df = pd.read_csv('videos_rows.csv')
|
| 142 |
+
|
| 143 |
+
# # Add embeddings column - apply encode to each word
|
| 144 |
+
# df['embedding'] = df['word'].apply(vectorizer.encode_and_format)
|
| 145 |
+
|
| 146 |
+
# # Drop any rows that don't have an embedding
|
| 147 |
+
# df = df.dropna(subset=['embedding'])
|
| 148 |
+
# print(df.head())
|
| 149 |
+
|
| 150 |
+
# df.to_csv("vectors.csv", index=False, columns=["word", "video_url", "embedding"], header=True)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
if __name__ == "__main__":
|
| 154 |
+
import asyncio
|
| 155 |
+
asyncio.run(main())
|
video_gen.py
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import mediapipe as mp
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
def extract_keypoints_from_video(video_path, verbose=False):
|
| 6 |
+
mp_pose = mp.solutions.pose
|
| 7 |
+
mp_hands = mp.solutions.hands
|
| 8 |
+
|
| 9 |
+
pose_model = mp_pose.Pose()
|
| 10 |
+
hands_model = mp_hands.Hands(static_image_mode=False, max_num_hands=2)
|
| 11 |
+
|
| 12 |
+
cap = cv2.VideoCapture(video_path)
|
| 13 |
+
keypoints_sequence = []
|
| 14 |
+
|
| 15 |
+
frame_idx = 0
|
| 16 |
+
|
| 17 |
+
while cap.isOpened():
|
| 18 |
+
success, frame = cap.read()
|
| 19 |
+
if not success:
|
| 20 |
+
break
|
| 21 |
+
|
| 22 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 23 |
+
h, w, _ = frame.shape
|
| 24 |
+
|
| 25 |
+
# Pose estimation
|
| 26 |
+
pose_results = pose_model.process(frame_rgb)
|
| 27 |
+
if not pose_results.pose_landmarks:
|
| 28 |
+
frame_idx += 1
|
| 29 |
+
continue
|
| 30 |
+
|
| 31 |
+
# Extract 33 body keypoints
|
| 32 |
+
pose_landmarks = pose_results.pose_landmarks.landmark
|
| 33 |
+
pose = np.array([[lm.x, lm.y] for lm in pose_landmarks]) # shape (33, 2)
|
| 34 |
+
|
| 35 |
+
# Hand tracking
|
| 36 |
+
left_hand = np.zeros((21, 2))
|
| 37 |
+
right_hand = np.zeros((21, 2))
|
| 38 |
+
|
| 39 |
+
hand_results = hands_model.process(frame_rgb)
|
| 40 |
+
if hand_results.multi_hand_landmarks and hand_results.multi_handedness:
|
| 41 |
+
for hand_landmarks, hand_info in zip(hand_results.multi_hand_landmarks, hand_results.multi_handedness):
|
| 42 |
+
label = hand_info.classification[0].label # 'Left' or 'Right'
|
| 43 |
+
hand_array = np.array([[lm.x, lm.y] for lm in hand_landmarks.landmark])
|
| 44 |
+
if label == "Left":
|
| 45 |
+
left_hand = hand_array
|
| 46 |
+
else:
|
| 47 |
+
right_hand = hand_array
|
| 48 |
+
|
| 49 |
+
keypoints_sequence.append((pose, left_hand, right_hand))
|
| 50 |
+
|
| 51 |
+
if verbose:
|
| 52 |
+
print(f"Processed frame {frame_idx}")
|
| 53 |
+
frame_idx += 1
|
| 54 |
+
|
| 55 |
+
cap.release()
|
| 56 |
+
pose_model.close()
|
| 57 |
+
hands_model.close()
|
| 58 |
+
|
| 59 |
+
return keypoints_sequence
|
| 60 |
+
|
| 61 |
+
def render_person(frame, pose, left_hand, right_hand):
|
| 62 |
+
h, w = frame.shape[:2]
|
| 63 |
+
|
| 64 |
+
# Define MediaPipe Pose keypoint indices
|
| 65 |
+
# Face
|
| 66 |
+
NOSE = 0
|
| 67 |
+
LEFT_EYE = 2
|
| 68 |
+
RIGHT_EYE = 5
|
| 69 |
+
LEFT_EAR = 7
|
| 70 |
+
RIGHT_EAR = 8
|
| 71 |
+
|
| 72 |
+
# Body
|
| 73 |
+
LEFT_SHOULDER = 11
|
| 74 |
+
RIGHT_SHOULDER = 12
|
| 75 |
+
LEFT_ELBOW = 13
|
| 76 |
+
RIGHT_ELBOW = 14
|
| 77 |
+
LEFT_WRIST = 15
|
| 78 |
+
RIGHT_WRIST = 16
|
| 79 |
+
LEFT_HIP = 23
|
| 80 |
+
RIGHT_HIP = 24
|
| 81 |
+
LEFT_KNEE = 25
|
| 82 |
+
RIGHT_KNEE = 26
|
| 83 |
+
LEFT_ANKLE = 27
|
| 84 |
+
RIGHT_ANKLE = 28
|
| 85 |
+
|
| 86 |
+
# Define hand keypoint indices for MediaPipe Hands
|
| 87 |
+
# Thumb: 0-4, Index: 5-8, Middle: 9-12, Ring: 13-16, Pinky: 17-20
|
| 88 |
+
THUMB_TIP = 4
|
| 89 |
+
INDEX_TIP = 8
|
| 90 |
+
MIDDLE_TIP = 12
|
| 91 |
+
RING_TIP = 16
|
| 92 |
+
PINKY_TIP = 20
|
| 93 |
+
|
| 94 |
+
# Define finger connections
|
| 95 |
+
finger_connections = [
|
| 96 |
+
# Thumb
|
| 97 |
+
(0, 1), (1, 2), (2, 3), (3, 4),
|
| 98 |
+
# Index finger
|
| 99 |
+
(0, 5), (5, 6), (6, 7), (7, 8),
|
| 100 |
+
# Middle finger
|
| 101 |
+
(0, 9), (9, 10), (10, 11), (11, 12),
|
| 102 |
+
# Ring finger
|
| 103 |
+
(0, 13), (13, 14), (14, 15), (15, 16),
|
| 104 |
+
# Pinky
|
| 105 |
+
(0, 17), (17, 18), (18, 19), (19, 20)
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
# Enhanced friendly color palette
|
| 109 |
+
skin_color = (173, 216, 230) # Light brown bear skin
|
| 110 |
+
outline_color = (40, 40, 40) # Softer outline
|
| 111 |
+
shirt_color = (205, 170, 125) # Light blue tuxedo jacket
|
| 112 |
+
|
| 113 |
+
# shirt_color = (205, 170, 125)
|
| 114 |
+
# skin_color = (173, 216, 230)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
pants_color = (135, 206, 235) # Slightly darker light blue tuxedo pants
|
| 118 |
+
bow_tie_color = (255, 255, 255) # White bow tie
|
| 119 |
+
eye_color = (255, 255, 255) # White eyes
|
| 120 |
+
pupil_color = (0, 0, 0) # Black pupils
|
| 121 |
+
|
| 122 |
+
# Draw body parts as filled shapes
|
| 123 |
+
|
| 124 |
+
# 1. Head (face) with enhanced friendly styling
|
| 125 |
+
if len(pose) > max(LEFT_EYE, RIGHT_EYE, LEFT_EAR, RIGHT_EAR):
|
| 126 |
+
# Calculate head center and size
|
| 127 |
+
head_center_x = pose[NOSE][0] * w
|
| 128 |
+
head_center_y = pose[NOSE][1] * h
|
| 129 |
+
|
| 130 |
+
# Estimate head size based on face keypoints
|
| 131 |
+
if pose[LEFT_EYE][0] > 0 and pose[RIGHT_EYE][0] > 0:
|
| 132 |
+
eye_distance = abs(pose[LEFT_EYE][0] - pose[RIGHT_EYE][0]) * w
|
| 133 |
+
head_radius = eye_distance * 1.8 # Larger head for friendlier look
|
| 134 |
+
else:
|
| 135 |
+
head_radius = 35
|
| 136 |
+
|
| 137 |
+
# Draw bear ears first (behind the head)
|
| 138 |
+
ear_radius = int(head_radius * 0.4)
|
| 139 |
+
# Left ear
|
| 140 |
+
left_ear_x = int(head_center_x - head_radius * 0.6)
|
| 141 |
+
left_ear_y = int(head_center_y - head_radius * 0.8)
|
| 142 |
+
cv2.circle(frame, (left_ear_x, left_ear_y), ear_radius, skin_color, -1)
|
| 143 |
+
cv2.circle(frame, (left_ear_x, left_ear_y), ear_radius, outline_color, 2)
|
| 144 |
+
# Inner ear detail
|
| 145 |
+
cv2.circle(frame, (left_ear_x, left_ear_y), int(ear_radius * 0.6), (120, 160, 180), -1)
|
| 146 |
+
|
| 147 |
+
# Right ear
|
| 148 |
+
right_ear_x = int(head_center_x + head_radius * 0.6)
|
| 149 |
+
right_ear_y = int(head_center_y - head_radius * 0.8)
|
| 150 |
+
cv2.circle(frame, (right_ear_x, right_ear_y), ear_radius, skin_color, -1)
|
| 151 |
+
cv2.circle(frame, (right_ear_x, right_ear_y), ear_radius, outline_color, 2)
|
| 152 |
+
# Inner ear detail
|
| 153 |
+
cv2.circle(frame, (right_ear_x, right_ear_y), int(ear_radius * 0.6), (120, 160, 180), -1)
|
| 154 |
+
|
| 155 |
+
# Draw head with skin color
|
| 156 |
+
cv2.circle(frame, (int(head_center_x), int(head_center_y)), int(head_radius), skin_color, -1)
|
| 157 |
+
cv2.circle(frame, (int(head_center_x), int(head_center_y)), int(head_radius), outline_color, 2)
|
| 158 |
+
|
| 159 |
+
# Draw larger, cuter bear eyes
|
| 160 |
+
if pose[LEFT_EYE][0] > 0 and pose[LEFT_EYE][1] > 0:
|
| 161 |
+
eye_x, eye_y = int(pose[LEFT_EYE][0] * w), int(pose[LEFT_EYE][1] * h)
|
| 162 |
+
# Larger white eye
|
| 163 |
+
cv2.circle(frame, (eye_x, eye_y), 10, eye_color, -1)
|
| 164 |
+
# Larger pupil
|
| 165 |
+
cv2.circle(frame, (eye_x, eye_y), 6, pupil_color, -1)
|
| 166 |
+
# Eye outline
|
| 167 |
+
cv2.circle(frame, (eye_x, eye_y), 10, outline_color, 1)
|
| 168 |
+
# Eye shine
|
| 169 |
+
cv2.circle(frame, (eye_x-3, eye_y-3), 3, (255, 255, 255), -1)
|
| 170 |
+
|
| 171 |
+
if pose[RIGHT_EYE][0] > 0 and pose[RIGHT_EYE][1] > 0:
|
| 172 |
+
eye_x, eye_y = int(pose[RIGHT_EYE][0] * w), int(pose[RIGHT_EYE][1] * h)
|
| 173 |
+
# Larger white eye
|
| 174 |
+
cv2.circle(frame, (eye_x, eye_y), 10, eye_color, -1)
|
| 175 |
+
# Larger pupil
|
| 176 |
+
cv2.circle(frame, (eye_x, eye_y), 6, pupil_color, -1)
|
| 177 |
+
# Eye outline
|
| 178 |
+
cv2.circle(frame, (eye_x, eye_y), 10, outline_color, 1)
|
| 179 |
+
# Eye shine
|
| 180 |
+
cv2.circle(frame, (eye_x-3, eye_y-3), 3, (255, 255, 255), -1)
|
| 181 |
+
|
| 182 |
+
# Draw cute bear nose
|
| 183 |
+
nose_x = int(head_center_x)
|
| 184 |
+
nose_y = int(head_center_y + head_radius * 0.1)
|
| 185 |
+
# Draw a cute round nose
|
| 186 |
+
cv2.circle(frame, (nose_x, nose_y), 6, (80, 40, 20), -1) # Dark brown nose
|
| 187 |
+
cv2.circle(frame, (nose_x, nose_y), 6, outline_color, 1)
|
| 188 |
+
|
| 189 |
+
# Draw friendly smile
|
| 190 |
+
smile_center_x = int(head_center_x)
|
| 191 |
+
smile_center_y = int(head_center_y + head_radius * 0.3)
|
| 192 |
+
smile_radius = int(head_radius * 0.6)
|
| 193 |
+
# Draw smile arc
|
| 194 |
+
cv2.ellipse(frame, (smile_center_x, smile_center_y), (smile_radius, smile_radius//2),
|
| 195 |
+
0, 0, 180, outline_color, 3)
|
| 196 |
+
|
| 197 |
+
# 2. Torso with nice shirt
|
| 198 |
+
if len(pose) > max(LEFT_SHOULDER, RIGHT_SHOULDER, LEFT_HIP, RIGHT_HIP):
|
| 199 |
+
# Calculate torso points
|
| 200 |
+
left_shoulder = (int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h))
|
| 201 |
+
right_shoulder = (int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h))
|
| 202 |
+
left_hip = (int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h))
|
| 203 |
+
right_hip = (int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h))
|
| 204 |
+
|
| 205 |
+
# Draw torso as a filled polygon with nice shirt color
|
| 206 |
+
torso_points = np.array([left_shoulder, right_shoulder, right_hip, left_hip], np.int32)
|
| 207 |
+
cv2.fillPoly(frame, [torso_points], shirt_color)
|
| 208 |
+
cv2.polylines(frame, [torso_points], True, outline_color, 2)
|
| 209 |
+
|
| 210 |
+
# 3. Arms with better proportions (non-stick)
|
| 211 |
+
# Left arm
|
| 212 |
+
if len(pose) > max(LEFT_SHOULDER, LEFT_ELBOW, LEFT_WRIST):
|
| 213 |
+
if pose[LEFT_SHOULDER][0] > 0 and pose[LEFT_ELBOW][0] > 0:
|
| 214 |
+
# Upper arm - 3x thicker and more natural
|
| 215 |
+
cv2.line(frame,
|
| 216 |
+
(int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h)),
|
| 217 |
+
(int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
|
| 218 |
+
skin_color, 36)
|
| 219 |
+
cv2.line(frame,
|
| 220 |
+
(int(pose[LEFT_SHOULDER][0] * w), int(pose[LEFT_SHOULDER][1] * h)),
|
| 221 |
+
(int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
|
| 222 |
+
outline_color, 2)
|
| 223 |
+
|
| 224 |
+
# Lower arm
|
| 225 |
+
if pose[LEFT_WRIST][0] > 0:
|
| 226 |
+
cv2.line(frame,
|
| 227 |
+
(int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
|
| 228 |
+
(int(pose[LEFT_WRIST][0] * w), int(pose[LEFT_WRIST][1] * h)),
|
| 229 |
+
skin_color, 30)
|
| 230 |
+
cv2.line(frame,
|
| 231 |
+
(int(pose[LEFT_ELBOW][0] * w), int(pose[LEFT_ELBOW][1] * h)),
|
| 232 |
+
(int(pose[LEFT_WRIST][0] * w), int(pose[LEFT_WRIST][1] * h)),
|
| 233 |
+
outline_color, 2)
|
| 234 |
+
|
| 235 |
+
# Right arm
|
| 236 |
+
if len(pose) > max(RIGHT_SHOULDER, RIGHT_ELBOW, RIGHT_WRIST):
|
| 237 |
+
if pose[RIGHT_SHOULDER][0] > 0 and pose[RIGHT_ELBOW][0] > 0:
|
| 238 |
+
# Upper arm - 3x thicker and more natural
|
| 239 |
+
cv2.line(frame,
|
| 240 |
+
(int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h)),
|
| 241 |
+
(int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
|
| 242 |
+
skin_color, 36)
|
| 243 |
+
cv2.line(frame,
|
| 244 |
+
(int(pose[RIGHT_SHOULDER][0] * w), int(pose[RIGHT_SHOULDER][1] * h)),
|
| 245 |
+
(int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
|
| 246 |
+
outline_color, 2)
|
| 247 |
+
|
| 248 |
+
# Lower arm
|
| 249 |
+
if pose[RIGHT_WRIST][0] > 0:
|
| 250 |
+
cv2.line(frame,
|
| 251 |
+
(int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
|
| 252 |
+
(int(pose[RIGHT_WRIST][0] * w), int(pose[RIGHT_WRIST][1] * h)),
|
| 253 |
+
skin_color, 30)
|
| 254 |
+
cv2.line(frame,
|
| 255 |
+
(int(pose[RIGHT_ELBOW][0] * w), int(pose[RIGHT_ELBOW][1] * h)),
|
| 256 |
+
(int(pose[RIGHT_WRIST][0] * w), int(pose[RIGHT_WRIST][1] * h)),
|
| 257 |
+
outline_color, 2)
|
| 258 |
+
|
| 259 |
+
# 4. Legs with nice pants
|
| 260 |
+
# Left leg
|
| 261 |
+
if len(pose) > max(LEFT_HIP, LEFT_KNEE, LEFT_ANKLE):
|
| 262 |
+
if pose[LEFT_HIP][0] > 0 and pose[LEFT_KNEE][0] > 0:
|
| 263 |
+
# Upper leg
|
| 264 |
+
cv2.line(frame,
|
| 265 |
+
(int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h)),
|
| 266 |
+
(int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
|
| 267 |
+
pants_color, 14)
|
| 268 |
+
cv2.line(frame,
|
| 269 |
+
(int(pose[LEFT_HIP][0] * w), int(pose[LEFT_HIP][1] * h)),
|
| 270 |
+
(int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
|
| 271 |
+
outline_color, 2)
|
| 272 |
+
|
| 273 |
+
# Lower leg
|
| 274 |
+
if pose[LEFT_ANKLE][0] > 0:
|
| 275 |
+
cv2.line(frame,
|
| 276 |
+
(int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
|
| 277 |
+
(int(pose[LEFT_ANKLE][0] * w), int(pose[LEFT_ANKLE][1] * h)),
|
| 278 |
+
pants_color, 12)
|
| 279 |
+
cv2.line(frame,
|
| 280 |
+
(int(pose[LEFT_KNEE][0] * w), int(pose[LEFT_KNEE][1] * h)),
|
| 281 |
+
(int(pose[LEFT_ANKLE][0] * w), int(pose[LEFT_ANKLE][1] * h)),
|
| 282 |
+
outline_color, 2)
|
| 283 |
+
|
| 284 |
+
# Right leg
|
| 285 |
+
if len(pose) > max(RIGHT_HIP, RIGHT_KNEE, RIGHT_ANKLE):
|
| 286 |
+
if pose[RIGHT_HIP][0] > 0 and pose[RIGHT_KNEE][0] > 0:
|
| 287 |
+
# Upper leg
|
| 288 |
+
cv2.line(frame,
|
| 289 |
+
(int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h)),
|
| 290 |
+
(int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
|
| 291 |
+
pants_color, 14)
|
| 292 |
+
cv2.line(frame,
|
| 293 |
+
(int(pose[RIGHT_HIP][0] * w), int(pose[RIGHT_HIP][1] * h)),
|
| 294 |
+
(int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
|
| 295 |
+
outline_color, 2)
|
| 296 |
+
|
| 297 |
+
# Lower leg
|
| 298 |
+
if pose[RIGHT_ANKLE][0] > 0:
|
| 299 |
+
cv2.line(frame,
|
| 300 |
+
(int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
|
| 301 |
+
(int(pose[RIGHT_ANKLE][0] * w), int(pose[RIGHT_ANKLE][1] * h)),
|
| 302 |
+
pants_color, 12)
|
| 303 |
+
cv2.line(frame,
|
| 304 |
+
(int(pose[RIGHT_KNEE][0] * w), int(pose[RIGHT_KNEE][1] * h)),
|
| 305 |
+
(int(pose[RIGHT_ANKLE][0] * w), int(pose[RIGHT_ANKLE][1] * h)),
|
| 306 |
+
outline_color, 2)
|
| 307 |
+
|
| 308 |
+
# 1.5. Neck connecting head to shoulders
|
| 309 |
+
if len(pose) > max(LEFT_SHOULDER, RIGHT_SHOULDER):
|
| 310 |
+
if pose[LEFT_SHOULDER][0] > 0 and pose[RIGHT_SHOULDER][0] > 0:
|
| 311 |
+
# Calculate neck position and size
|
| 312 |
+
neck_center_x = (pose[LEFT_SHOULDER][0] + pose[RIGHT_SHOULDER][0]) / 2 * w
|
| 313 |
+
neck_center_y = (pose[LEFT_SHOULDER][1] + pose[RIGHT_SHOULDER][1]) / 2 * h
|
| 314 |
+
|
| 315 |
+
# Position neck slightly above shoulders
|
| 316 |
+
neck_y = neck_center_y - 15
|
| 317 |
+
|
| 318 |
+
# Calculate neck width based on shoulder distance
|
| 319 |
+
shoulder_distance = abs(pose[LEFT_SHOULDER][0] - pose[RIGHT_SHOULDER][0]) * w
|
| 320 |
+
neck_width = shoulder_distance * 0.3 # Neck is about 30% of shoulder width
|
| 321 |
+
neck_height = 25
|
| 322 |
+
|
| 323 |
+
# Draw neck as a filled rectangle with rounded corners effect
|
| 324 |
+
neck_left = int(neck_center_x - neck_width / 2)
|
| 325 |
+
neck_right = int(neck_center_x + neck_width / 2)
|
| 326 |
+
neck_top = int(neck_y - neck_height / 2)
|
| 327 |
+
neck_bottom = int(neck_y + neck_height / 2)
|
| 328 |
+
|
| 329 |
+
# Draw neck with skin color
|
| 330 |
+
cv2.rectangle(frame, (neck_left, neck_top), (neck_right, neck_bottom), skin_color, -1)
|
| 331 |
+
cv2.rectangle(frame, (neck_left, neck_top), (neck_right, neck_bottom), outline_color, 2)
|
| 332 |
+
|
| 333 |
+
# Draw bow tie
|
| 334 |
+
bow_center_x = int(neck_center_x)
|
| 335 |
+
bow_center_y = int(neck_y + neck_height / 2 + 5)
|
| 336 |
+
bow_width = 20
|
| 337 |
+
bow_height = 12
|
| 338 |
+
|
| 339 |
+
# Draw left side of bow tie
|
| 340 |
+
left_bow_points = np.array([
|
| 341 |
+
[bow_center_x - bow_width//2, bow_center_y - bow_height//2],
|
| 342 |
+
[bow_center_x - bow_width//2 - 8, bow_center_y],
|
| 343 |
+
[bow_center_x - bow_width//2, bow_center_y + bow_height//2],
|
| 344 |
+
[bow_center_x - 2, bow_center_y + bow_height//2],
|
| 345 |
+
[bow_center_x - 2, bow_center_y - bow_height//2]
|
| 346 |
+
], np.int32)
|
| 347 |
+
cv2.fillPoly(frame, [left_bow_points], bow_tie_color)
|
| 348 |
+
cv2.polylines(frame, [left_bow_points], True, outline_color, 1)
|
| 349 |
+
|
| 350 |
+
# Draw right side of bow tie
|
| 351 |
+
right_bow_points = np.array([
|
| 352 |
+
[bow_center_x + bow_width//2, bow_center_y - bow_height//2],
|
| 353 |
+
[bow_center_x + bow_width//2 + 8, bow_center_y],
|
| 354 |
+
[bow_center_x + bow_width//2, bow_center_y + bow_height//2],
|
| 355 |
+
[bow_center_x + 2, bow_center_y + bow_height//2],
|
| 356 |
+
[bow_center_x + 2, bow_center_y - bow_height//2]
|
| 357 |
+
], np.int32)
|
| 358 |
+
cv2.fillPoly(frame, [right_bow_points], bow_tie_color)
|
| 359 |
+
cv2.polylines(frame, [right_bow_points], True, outline_color, 1)
|
| 360 |
+
|
| 361 |
+
# Draw center knot of bow tie
|
| 362 |
+
knot_points = np.array([
|
| 363 |
+
[bow_center_x - 2, bow_center_y - 3],
|
| 364 |
+
[bow_center_x + 2, bow_center_y - 3],
|
| 365 |
+
[bow_center_x + 2, bow_center_y + 3],
|
| 366 |
+
[bow_center_x - 2, bow_center_y + 3]
|
| 367 |
+
], np.int32)
|
| 368 |
+
cv2.fillPoly(frame, [knot_points], bow_tie_color)
|
| 369 |
+
cv2.polylines(frame, [knot_points], True, outline_color, 1)
|
| 370 |
+
|
| 371 |
+
# 5. Enhanced Hands with clear finger definition (drawn last to ensure they're always in front)
|
| 372 |
+
for hand, hand_color in [(left_hand, (255, 0, 0)), (right_hand, (0, 0, 255))]:
|
| 373 |
+
if np.any(hand != 0): # Only draw if hand is detected
|
| 374 |
+
# Draw hand palm as a filled shape
|
| 375 |
+
palm_points = []
|
| 376 |
+
# Use wrist and base of fingers for palm
|
| 377 |
+
palm_indices = [0, 5, 9, 13, 17] # Wrist and base of each finger
|
| 378 |
+
for idx in palm_indices:
|
| 379 |
+
if idx < len(hand) and hand[idx][0] > 0 and hand[idx][1] > 0:
|
| 380 |
+
palm_points.append([int(hand[idx][0] * w), int(hand[idx][1] * h)])
|
| 381 |
+
|
| 382 |
+
if len(palm_points) > 3:
|
| 383 |
+
palm_points = np.array(palm_points, np.int32)
|
| 384 |
+
hull = cv2.convexHull(palm_points)
|
| 385 |
+
cv2.fillPoly(frame, [hull], (255, 182, 193)) # Light pink color for palm
|
| 386 |
+
cv2.polylines(frame, [hull], True, outline_color, 2)
|
| 387 |
+
|
| 388 |
+
# Draw individual fingers with clear connections
|
| 389 |
+
for connection in finger_connections:
|
| 390 |
+
start_idx, end_idx = connection
|
| 391 |
+
if (start_idx < len(hand) and end_idx < len(hand) and
|
| 392 |
+
hand[start_idx][0] > 0 and hand[start_idx][1] > 0 and
|
| 393 |
+
hand[end_idx][0] > 0 and hand[end_idx][1] > 0):
|
| 394 |
+
|
| 395 |
+
start_point = (int(hand[start_idx][0] * w), int(hand[start_idx][1] * h))
|
| 396 |
+
end_point = (int(hand[end_idx][0] * w), int(hand[end_idx][1] * h))
|
| 397 |
+
|
| 398 |
+
# Draw finger bone
|
| 399 |
+
cv2.line(frame, start_point, end_point, (255, 182, 193), 9) # Light pink color for finger bones
|
| 400 |
+
cv2.line(frame, start_point, end_point, outline_color, 1)
|
| 401 |
+
|
| 402 |
+
# Draw finger tips with emphasis
|
| 403 |
+
finger_tips = [THUMB_TIP, INDEX_TIP, MIDDLE_TIP, RING_TIP, PINKY_TIP]
|
| 404 |
+
for tip_idx in finger_tips:
|
| 405 |
+
if tip_idx < len(hand) and hand[tip_idx][0] > 0 and hand[tip_idx][1] > 0:
|
| 406 |
+
tip_x, tip_y = int(hand[tip_idx][0] * w), int(hand[tip_idx][1] * h)
|
| 407 |
+
# Draw larger, more visible finger tips
|
| 408 |
+
cv2.circle(frame, (tip_x, tip_y), 4, (255, 182, 193), -1) # Light pink color for finger tips
|
| 409 |
+
cv2.circle(frame, (tip_x, tip_y), 4, outline_color, 2)
|
| 410 |
+
# Add a small highlight
|
| 411 |
+
cv2.circle(frame, (tip_x-1, tip_y-1), 1, (255, 255, 255), -1)
|
| 412 |
+
|
| 413 |
+
# Draw all hand keypoints for clarity (keeping original red/blue colors for dots)
|
| 414 |
+
for i, (x, y) in enumerate(hand):
|
| 415 |
+
if x > 0 and y > 0:
|
| 416 |
+
point_x, point_y = int(x * w), int(y * h)
|
| 417 |
+
# Different colors for different parts of the hand
|
| 418 |
+
if i in finger_tips:
|
| 419 |
+
cv2.circle(frame, (point_x, point_y), 2, hand_color, -1)
|
| 420 |
+
else:
|
| 421 |
+
cv2.circle(frame, (point_x, point_y), 1, hand_color, -1)
|
| 422 |
+
|
| 423 |
+
return frame
|
| 424 |
+
|
| 425 |
+
def interpolate_keypoints(kptsA, kptsB, steps):
|
| 426 |
+
poseA, leftA, rightA = kptsA
|
| 427 |
+
poseB, leftB, rightB = kptsB
|
| 428 |
+
|
| 429 |
+
frames = []
|
| 430 |
+
for t in range(1, steps + 1):
|
| 431 |
+
alpha = t / (steps + 1)
|
| 432 |
+
interp_pose = (1 - alpha) * poseA + alpha * poseB
|
| 433 |
+
|
| 434 |
+
# Check if hands are detected (non-zero coordinates)
|
| 435 |
+
leftA_detected = np.any(leftA != 0)
|
| 436 |
+
rightA_detected = np.any(rightA != 0)
|
| 437 |
+
leftB_detected = np.any(leftB != 0)
|
| 438 |
+
rightB_detected = np.any(rightB != 0)
|
| 439 |
+
|
| 440 |
+
# Interpolate left hand only if both frames have detected hands
|
| 441 |
+
if leftA_detected and leftB_detected:
|
| 442 |
+
print("leftA_detected and leftB_detected")
|
| 443 |
+
interp_left = (1 - alpha) * leftA + alpha * leftB
|
| 444 |
+
elif leftA_detected:
|
| 445 |
+
interp_left = leftA # Keep the last known position
|
| 446 |
+
elif leftB_detected:
|
| 447 |
+
interp_left = leftB # Use the new position
|
| 448 |
+
else:
|
| 449 |
+
interp_left = np.zeros((21, 2)) # No hands detected
|
| 450 |
+
|
| 451 |
+
# Interpolate right hand only if both frames have detected hands
|
| 452 |
+
if rightA_detected and rightB_detected:
|
| 453 |
+
print("rightA_detected and rightB_detected")
|
| 454 |
+
interp_right = (1 - alpha) * rightA + alpha * rightB
|
| 455 |
+
elif rightA_detected:
|
| 456 |
+
interp_right = rightA # Keep the last known position
|
| 457 |
+
elif rightB_detected:
|
| 458 |
+
interp_right = rightB # Use the new position
|
| 459 |
+
else:
|
| 460 |
+
interp_right = np.zeros((21, 2)) # No hands detected
|
| 461 |
+
|
| 462 |
+
frames.append((interp_pose, interp_left, interp_right))
|
| 463 |
+
return frames
|
| 464 |
+
|
| 465 |
+
def create_stitched_video(videoA_path, videoB_path, output_path="stitched_output.mp4"):
|
| 466 |
+
# Extract keypoints from both videos
|
| 467 |
+
videoA_keypoints = extract_keypoints_from_video(videoA_path)
|
| 468 |
+
videoB_keypoints = extract_keypoints_from_video(videoB_path)
|
| 469 |
+
|
| 470 |
+
# Create video writer
|
| 471 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 472 |
+
out = cv2.VideoWriter(output_path, fourcc, 30.0, (1280, 720))
|
| 473 |
+
|
| 474 |
+
# Show original A
|
| 475 |
+
for pose, l, r in videoA_keypoints:
|
| 476 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
| 477 |
+
out.write(render_person(frame, pose, l, r))
|
| 478 |
+
|
| 479 |
+
# Interpolation
|
| 480 |
+
interp = interpolate_keypoints(videoA_keypoints[-1], videoB_keypoints[0], steps=15)
|
| 481 |
+
for pose, l, r in interp:
|
| 482 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
| 483 |
+
out.write(render_person(frame, pose, l, r))
|
| 484 |
+
|
| 485 |
+
# Show original B
|
| 486 |
+
for pose, l, r in videoB_keypoints:
|
| 487 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
| 488 |
+
out.write(render_person(frame, pose, l, r))
|
| 489 |
+
|
| 490 |
+
out.release()
|
| 491 |
+
print(f"Video saved to {output_path}")
|
| 492 |
+
|
| 493 |
+
def create_multi_stitched_video(video_paths, output_path="multi_stitched_output.mp4", transition_steps=15):
|
| 494 |
+
"""
|
| 495 |
+
Create a stitched video from multiple video files.
|
| 496 |
+
|
| 497 |
+
Args:
|
| 498 |
+
video_paths (list): List of paths to MP4 video files
|
| 499 |
+
output_path (str): Output path for the final video
|
| 500 |
+
transition_steps (int): Number of frames for transitions between videos
|
| 501 |
+
"""
|
| 502 |
+
if len(video_paths) < 2:
|
| 503 |
+
print("Need at least 2 videos to stitch together!")
|
| 504 |
+
return
|
| 505 |
+
|
| 506 |
+
print(f"Processing {len(video_paths)} videos...")
|
| 507 |
+
|
| 508 |
+
# Extract keypoints from all videos
|
| 509 |
+
all_keypoints = []
|
| 510 |
+
for i, video_path in enumerate(video_paths):
|
| 511 |
+
print(f"Extracting keypoints from video {i+1}/{len(video_paths)}: {video_path}")
|
| 512 |
+
keypoints = extract_keypoints_from_video(video_path)
|
| 513 |
+
all_keypoints.append(keypoints)
|
| 514 |
+
print(f" - Extracted {len(keypoints)} frames")
|
| 515 |
+
|
| 516 |
+
# Create video writer
|
| 517 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 518 |
+
out = cv2.VideoWriter(output_path, fourcc, 30.0, (1280, 720))
|
| 519 |
+
|
| 520 |
+
total_frames = 0
|
| 521 |
+
|
| 522 |
+
# Process each video
|
| 523 |
+
for i, keypoints in enumerate(all_keypoints):
|
| 524 |
+
print(f"Rendering video {i+1}/{len(all_keypoints)}...")
|
| 525 |
+
|
| 526 |
+
# Render all frames from current video
|
| 527 |
+
for pose, l, r in keypoints:
|
| 528 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
| 529 |
+
out.write(render_person(frame, pose, l, r))
|
| 530 |
+
total_frames += 1
|
| 531 |
+
|
| 532 |
+
# Add transition to next video (except for the last video)
|
| 533 |
+
if i < len(all_keypoints) - 1:
|
| 534 |
+
print(f" Adding transition to next video...")
|
| 535 |
+
next_keypoints = all_keypoints[i + 1]
|
| 536 |
+
|
| 537 |
+
# Interpolate between last frame of current video and first frame of next video
|
| 538 |
+
interp = interpolate_keypoints(keypoints[-1], next_keypoints[0], steps=transition_steps)
|
| 539 |
+
for pose, l, r in interp:
|
| 540 |
+
frame = np.ones((720, 1280, 3), dtype=np.uint8) * 255
|
| 541 |
+
out.write(render_person(frame, pose, l, r))
|
| 542 |
+
total_frames += 1
|
| 543 |
+
|
| 544 |
+
out.release()
|
| 545 |
+
print(f"Multi-stitched video saved to {output_path}")
|
| 546 |
+
print(f"Total frames rendered: {total_frames}")
|
| 547 |
+
print(f"Video duration: {total_frames/30:.2f} seconds")
|
| 548 |
+
|
| 549 |
+
if __name__ == "__main__":
|
| 550 |
+
# Example usage for multiple videos
|
| 551 |
+
video_list = [
|
| 552 |
+
"/Users/ethantam/desktop/35304.mp4",
|
| 553 |
+
"/Users/ethantam/desktop/23978.mp4",
|
| 554 |
+
"/Users/ethantam/desktop/23106.mp4",
|
| 555 |
+
# Add more video paths here as needed
|
| 556 |
+
]
|
| 557 |
+
|
| 558 |
+
# Create multi-stitched video
|
| 559 |
+
create_multi_stitched_video(video_list, "multi_stitched_output_1.mp4")
|
| 560 |
+
|
| 561 |
+
# Or use the original 2-video function
|
| 562 |
+
# create_stitched_video("/Users/ethantam/desktop/35304.mp4", "/Users/ethantam/desktop/23978.mp4")
|
videos_rows.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|