CV-Extractor

Sleeping

App Files Files Community

Sher1988 commited on 17 days ago

Commit

c00940f

verified ·

1 Parent(s): ab1e6d3

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -146

app.py DELETED Viewed

@@ -1,146 +0,0 @@
-import torch
-import pandas as pd
-import streamlit as st
-from PIL import Image
-from encoder import EncoderCNN
-from decoder import DecoderRNN
-from utils.vocab import Vocabulary
-#from torchvision import transforms as T
-from utils.helpers import VOCAB_PATH, CAPTIONS_PATH, IMAGE_DIR
-from utils.transforms import transforms
-from inference import sample_with_temp, sample
-import sacrebleu
-import os
-from huggingface_hub import hf_hub_download
-@st.cache_resource
-def load_models():
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    # Load captions and vocab
-    captions = pd.read_csv(CAPTIONS_PATH)
-    vocab = Vocabulary(load_path=VOCAB_PATH)
-    # Initialize Models
-    encoder = EncoderCNN(256).to(device)
-    decoder = DecoderRNN(len(vocab), 256, 512).to(device)
-    #
-    repo_id = "Sher1988/image-classifier-weights"
-    encoder_path = hf_hub_download(repo_id=repo_id, filename="encoder.pth")
-    decoder_path = hf_hub_download(repo_id=repo_id, filename="decoder.pth")
-    # Load Weights
-    encoder.load_state_dict(torch.load(encoder_path, map_location=device))
-    decoder.load_state_dict(torch.load(decoder_path, map_location=device))
-    encoder.eval()
-    decoder.eval()
-    return encoder, decoder, vocab, device, captions
-# --- Sidebar Configuration ---
-st.sidebar.header("Select an Example Image")
-if os.path.exists(IMAGE_DIR):
-    available_images = [f for f in os.listdir(IMAGE_DIR) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
-    selected_img_name = st.sidebar.selectbox("Choose from Flickr8k:", ["None"] + available_images)
-    # Add the preview thumbnail here
-    if selected_img_name != "None":
-        img_path = os.path.join(IMAGE_DIR, selected_img_name)
-        st.sidebar.image(Image.open(img_path), caption="Sidebar Selection Preview", use_container_width=True)
-else:
-    st.sidebar.warning("Image directory not found. Please check IMAGE_DIR path.")
-    selected_img_name = "None"
-# --- Main App Logic ---
-encoder, decoder, vocab, device, captions = load_models()
-act_caps = []
-caption = ''
-st.title("📸 AI Image Captioner")
-temp = st.slider("Sampling Temperature", min_value=0.0, max_value=0.8, value=0.1, step=0.1)
-st.info("Higher temperature = more creative/random. Lower temperature = more predictable.")
-uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
-# Determine which image to process
-img = None
-img_name = None
-if uploaded_file is not None:
-    img = Image.open(uploaded_file).convert('RGB')
-    img_name = uploaded_file.name
-elif selected_img_name != "None":
-    img_path = os.path.join(IMAGE_DIR, selected_img_name)
-    img = Image.open(img_path).convert('RGB')
-    img_name = selected_img_name
-# If we have an image (from either source), run the model
-if img is not None:
-    st.image(img, caption=f'Selected: {img_name}', width=300)
-    # Process
-    # Assuming transforms is defined or returned from load_models
-    img_tensor = transforms(img).unsqueeze(0).to(device)
-    # Get ground truth captions for the selected image name
-    act_caps = captions[captions['image'] == img_name]['caption'].tolist()
-    if act_caps:
-        st.subheader("Actual Captions:")
-        st.success("  \n".join(act_caps))
-    else:
-        st.info("No ground truth captions found for this image in the CSV.")
-    with torch.no_grad():
-        encoder_out = encoder(img_tensor)
-        # Pass the 'temp' variable from the slider here
-        caption = sample_with_temp(encoder_out, decoder, vocab, temp=temp)
-    st.subheader("Generated Caption:")
-    st.success(caption)
-if act_caps:
-    # sacrebleu expects a list of strings for hypothesis
-    # and a list of strings for references
-    refs = [act_caps]
-    sys = [caption]
-    bleu = sacrebleu.corpus_bleu(sys, refs)
-    st.subheader("Evaluation Metrics:")
-    st.metric(label="SacreBLEU Score", value=f"{bleu.score:.2f}")
-    st.progress(min(bleu.score / 50, 1.0))
-    # N-gram Precision breakdown
-    # bleu.precisions is a list: [p1, p2, p3, p4]
-    cols = st.columns(4)
-    for i, p in enumerate(bleu.precisions):
-        cols[i].markdown(f"{i+1}-gram")
-        cols[i].write(f"{p:.1f}%")
-    # Brief explanation
-    with st.expander("What do these mean?"):
-        st.write("""
-        - **1-gram**: Individual word accuracy (Vocabulary).
-        - **2-gram**: Fluency of word pairs.
-        - **4-gram**: Capturing longer phrases/sentence structure.
-        """)
-else:
-    st.info("Upload an image from the Flickr8k set to see BLEU metrics.")
-st.header('About this Project')
-st.markdown("""
-This AI model generates descriptive captions for uploaded images using a **ResNet50 + LSTM** architecture.
-* **Encoder:** Pre-trained ResNet50 (Frozen) extracts high-level visual features.
-* **Decoder:** A Long Short-Term Memory (LSTM) network trained for 10 epochs.
-* **Dataset:** Trained on the **Flickr8k dataset** (8,000 images).
-⚠��� **Note:** Because the model was trained on a specific, small-scale dataset with a frozen backbone, it performs satisfactory on outdoor scenes, people, and animals. It may produce unexpected results for images significantly different from the Flickr8k distribution.
-""")