NVIVO / app.py
ahm14's picture
Update app.py
fb6aefd verified
import streamlit as st
import docx
import PyPDF2
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
from googletrans import Translator
import spacy
# Load English NLP model
nlp = spacy.load("en_core_web_sm")
translator = Translator()
st.set_page_config(page_title="AI NVivo Coding App", layout="wide")
st.title("🧠 AI-Powered NVivo App (Text Analysis + Coding)")
st.markdown("Upload files or input captions manually. Analyze & code your qualitative data automatically!")
# ----------------------------
# Text Extraction Functions
# ----------------------------
def extract_text_from_docx(uploaded_file):
doc = docx.Document(uploaded_file)
return "\n".join([para.text for para in doc.paragraphs])
def extract_text_from_pdf(uploaded_file):
reader = PyPDF2.PdfReader(uploaded_file)
return "".join([page.extract_text() for page in reader.pages])
def extract_text_from_excel(uploaded_file):
df = pd.read_excel(uploaded_file)
return "\n".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))
# ----------------------------
# NLP + AI Analysis
# ----------------------------
def translate_text(text):
translated = translator.translate(text, src='auto', dest='en')
return translated.text
def analyze_text(text):
doc = nlp(text)
entities = [(ent.text, ent.label_) for ent in doc.ents]
sentiment = "Positive" if "good" in text.lower() else "Negative"
return entities, sentiment
def extract_keywords(text, top_n=10):
words = [word.lower() for word in text.split() if len(word) > 3 and word.isalpha()]
word_count = Counter(words)
return word_count.most_common(top_n)
def plot_keywords(keywords):
words, counts = zip(*keywords)
fig, ax = plt.subplots()
ax.barh(words, counts)
ax.set_xlabel('Frequency')
ax.set_title("Top Keywords")
st.pyplot(fig)
def auto_code_text(text):
themes = {
"activism": ["march", "protest", "rights", "resist"],
"intersectionality": ["women", "lgbt", "race", "class"],
"call_to_action": ["join", "support", "attend", "speak"],
"strategic_framing": ["narrative", "frame", "message"],
"inclusivity": ["diverse", "all", "together", "inclusion"]
}
codes = []
for code, keywords in themes.items():
if any(word in text.lower() for word in keywords):
codes.append(code)
return codes if codes else ["uncategorized"]
# ----------------------------
# File Upload
# ----------------------------
uploaded_file = st.file_uploader("πŸ“‚ Upload a file", type=["docx", "pdf", "xlsx"])
if uploaded_file:
ext = uploaded_file.name.split('.')[-1]
if ext == 'docx':
raw_text = extract_text_from_docx(uploaded_file)
elif ext == 'pdf':
raw_text = extract_text_from_pdf(uploaded_file)
elif ext == 'xlsx':
raw_text = extract_text_from_excel(uploaded_file)
st.subheader("πŸ“„ Extracted Text")
st.text_area("Raw Text", raw_text, height=150)
translated_text = translate_text(raw_text)
st.subheader("🌍 Translated to English")
st.text_area("Translated Text", translated_text, height=150)
entities, sentiment = analyze_text(translated_text)
st.subheader("🧠 Named Entities")
st.write(entities)
st.markdown(f"**Sentiment:** {sentiment}")
keywords = extract_keywords(translated_text)
st.subheader("πŸ”‘ Top Keywords")
st.write(keywords)
plot_keywords(keywords)
st.subheader("🏷️ Auto Codes for Full Document")
codes = auto_code_text(translated_text)
st.write(f"Detected Codes: {', '.join(codes)}")
# ----------------------------
# Manual Input
# ----------------------------
st.markdown("---")
st.subheader("✍️ Manually Enter Captions")
manual_input = st.text_area("Enter caption text here...", height=120)
if manual_input:
translated = translate_text(manual_input)
st.write("**Translated:**", translated)
entities, sentiment = analyze_text(translated)
st.write("**Entities:**", entities)
st.write("**Sentiment:**", sentiment)
keywords = extract_keywords(translated)
st.write("**Keywords:**", keywords)
plot_keywords(keywords)
codes = auto_code_text(translated)
st.success(f"Auto-Coded Themes: {', '.join(codes)}")
manual_tag = st.text_input("βž• Manually Add a Code (Optional)")
if manual_tag:
codes.append(manual_tag)
# Show final result
st.write("πŸ“Œ Final Coding for Caption:")
st.write({
"caption": manual_input,
"translated": translated,
"codes": codes
})