| import streamlit as st |
| import docx |
| import PyPDF2 |
| import pandas as pd |
| from collections import Counter |
| import matplotlib.pyplot as plt |
| from googletrans import Translator |
| import spacy |
|
|
| |
| nlp = spacy.load("en_core_web_sm") |
| translator = Translator() |
|
|
| st.set_page_config(page_title="AI NVivo Coding App", layout="wide") |
| st.title("π§ AI-Powered NVivo App (Text Analysis + Coding)") |
| st.markdown("Upload files or input captions manually. Analyze & code your qualitative data automatically!") |
|
|
| |
| |
| |
| def extract_text_from_docx(uploaded_file): |
| doc = docx.Document(uploaded_file) |
| return "\n".join([para.text for para in doc.paragraphs]) |
|
|
| def extract_text_from_pdf(uploaded_file): |
| reader = PyPDF2.PdfReader(uploaded_file) |
| return "".join([page.extract_text() for page in reader.pages]) |
|
|
| def extract_text_from_excel(uploaded_file): |
| df = pd.read_excel(uploaded_file) |
| return "\n".join(df.astype(str).apply(lambda x: " ".join(x), axis=1)) |
|
|
| |
| |
| |
| def translate_text(text): |
| translated = translator.translate(text, src='auto', dest='en') |
| return translated.text |
|
|
| def analyze_text(text): |
| doc = nlp(text) |
| entities = [(ent.text, ent.label_) for ent in doc.ents] |
| sentiment = "Positive" if "good" in text.lower() else "Negative" |
| return entities, sentiment |
|
|
| def extract_keywords(text, top_n=10): |
| words = [word.lower() for word in text.split() if len(word) > 3 and word.isalpha()] |
| word_count = Counter(words) |
| return word_count.most_common(top_n) |
|
|
| def plot_keywords(keywords): |
| words, counts = zip(*keywords) |
| fig, ax = plt.subplots() |
| ax.barh(words, counts) |
| ax.set_xlabel('Frequency') |
| ax.set_title("Top Keywords") |
| st.pyplot(fig) |
|
|
| def auto_code_text(text): |
| themes = { |
| "activism": ["march", "protest", "rights", "resist"], |
| "intersectionality": ["women", "lgbt", "race", "class"], |
| "call_to_action": ["join", "support", "attend", "speak"], |
| "strategic_framing": ["narrative", "frame", "message"], |
| "inclusivity": ["diverse", "all", "together", "inclusion"] |
| } |
| codes = [] |
| for code, keywords in themes.items(): |
| if any(word in text.lower() for word in keywords): |
| codes.append(code) |
| return codes if codes else ["uncategorized"] |
|
|
| |
| |
| |
| uploaded_file = st.file_uploader("π Upload a file", type=["docx", "pdf", "xlsx"]) |
|
|
| if uploaded_file: |
| ext = uploaded_file.name.split('.')[-1] |
| if ext == 'docx': |
| raw_text = extract_text_from_docx(uploaded_file) |
| elif ext == 'pdf': |
| raw_text = extract_text_from_pdf(uploaded_file) |
| elif ext == 'xlsx': |
| raw_text = extract_text_from_excel(uploaded_file) |
|
|
| st.subheader("π Extracted Text") |
| st.text_area("Raw Text", raw_text, height=150) |
|
|
| translated_text = translate_text(raw_text) |
| st.subheader("π Translated to English") |
| st.text_area("Translated Text", translated_text, height=150) |
|
|
| entities, sentiment = analyze_text(translated_text) |
| st.subheader("π§ Named Entities") |
| st.write(entities) |
| st.markdown(f"**Sentiment:** {sentiment}") |
|
|
| keywords = extract_keywords(translated_text) |
| st.subheader("π Top Keywords") |
| st.write(keywords) |
| plot_keywords(keywords) |
|
|
| st.subheader("π·οΈ Auto Codes for Full Document") |
| codes = auto_code_text(translated_text) |
| st.write(f"Detected Codes: {', '.join(codes)}") |
|
|
| |
| |
| |
| st.markdown("---") |
| st.subheader("βοΈ Manually Enter Captions") |
| manual_input = st.text_area("Enter caption text here...", height=120) |
|
|
| if manual_input: |
| translated = translate_text(manual_input) |
| st.write("**Translated:**", translated) |
|
|
| entities, sentiment = analyze_text(translated) |
| st.write("**Entities:**", entities) |
| st.write("**Sentiment:**", sentiment) |
|
|
| keywords = extract_keywords(translated) |
| st.write("**Keywords:**", keywords) |
| plot_keywords(keywords) |
|
|
| codes = auto_code_text(translated) |
| st.success(f"Auto-Coded Themes: {', '.join(codes)}") |
|
|
| manual_tag = st.text_input("β Manually Add a Code (Optional)") |
| if manual_tag: |
| codes.append(manual_tag) |
|
|
| |
| st.write("π Final Coding for Caption:") |
| st.write({ |
| "caption": manual_input, |
| "translated": translated, |
| "codes": codes |
| }) |
|
|