Spaces:

ahm14
/

NVIVO

Build error

App Files Files Community

NVIVO / app.py

ahm14

Update app.py

fb6aefd verified about 1 year ago

raw

history blame contribute delete

4.58 kB

	import streamlit as st
	import docx
	import PyPDF2
	import pandas as pd
	from collections import Counter
	import matplotlib.pyplot as plt
	from googletrans import Translator
	import spacy

	# Load English NLP model
	nlp = spacy.load("en_core_web_sm")
	translator = Translator()

	st.set_page_config(page_title="AI NVivo Coding App", layout="wide")
	st.title("🧠 AI-Powered NVivo App (Text Analysis + Coding)")
	st.markdown("Upload files or input captions manually. Analyze & code your qualitative data automatically!")

	# ----------------------------
	# Text Extraction Functions
	# ----------------------------
	def extract_text_from_docx(uploaded_file):
	doc = docx.Document(uploaded_file)
	return "\n".join([para.text for para in doc.paragraphs])

	def extract_text_from_pdf(uploaded_file):
	reader = PyPDF2.PdfReader(uploaded_file)
	return "".join([page.extract_text() for page in reader.pages])

	def extract_text_from_excel(uploaded_file):
	df = pd.read_excel(uploaded_file)
	return "\n".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))

	# ----------------------------
	# NLP + AI Analysis
	# ----------------------------
	def translate_text(text):
	translated = translator.translate(text, src='auto', dest='en')
	return translated.text

	def analyze_text(text):
	doc = nlp(text)
	entities = [(ent.text, ent.label_) for ent in doc.ents]
	sentiment = "Positive" if "good" in text.lower() else "Negative"
	return entities, sentiment

	def extract_keywords(text, top_n=10):
	words = [word.lower() for word in text.split() if len(word) > 3 and word.isalpha()]
	word_count = Counter(words)
	return word_count.most_common(top_n)

	def plot_keywords(keywords):
	words, counts = zip(*keywords)
	fig, ax = plt.subplots()
	ax.barh(words, counts)
	ax.set_xlabel('Frequency')
	ax.set_title("Top Keywords")
	st.pyplot(fig)

	def auto_code_text(text):
	themes = {
	"activism": ["march", "protest", "rights", "resist"],
	"intersectionality": ["women", "lgbt", "race", "class"],
	"call_to_action": ["join", "support", "attend", "speak"],
	"strategic_framing": ["narrative", "frame", "message"],
	"inclusivity": ["diverse", "all", "together", "inclusion"]
	}
	codes = []
	for code, keywords in themes.items():
	if any(word in text.lower() for word in keywords):
	codes.append(code)
	return codes if codes else ["uncategorized"]

	# ----------------------------
	# File Upload
	# ----------------------------
	uploaded_file = st.file_uploader("📂 Upload a file", type=["docx", "pdf", "xlsx"])

	if uploaded_file:
	ext = uploaded_file.name.split('.')[-1]
	if ext == 'docx':
	raw_text = extract_text_from_docx(uploaded_file)
	elif ext == 'pdf':
	raw_text = extract_text_from_pdf(uploaded_file)
	elif ext == 'xlsx':
	raw_text = extract_text_from_excel(uploaded_file)

	st.subheader("📄 Extracted Text")
	st.text_area("Raw Text", raw_text, height=150)

	translated_text = translate_text(raw_text)
	st.subheader("🌍 Translated to English")
	st.text_area("Translated Text", translated_text, height=150)

	entities, sentiment = analyze_text(translated_text)
	st.subheader("🧠 Named Entities")
	st.write(entities)
	st.markdown(f"Sentiment: {sentiment}")

	keywords = extract_keywords(translated_text)
	st.subheader("🔑 Top Keywords")
	st.write(keywords)
	plot_keywords(keywords)

	st.subheader("🏷️ Auto Codes for Full Document")
	codes = auto_code_text(translated_text)
	st.write(f"Detected Codes: {', '.join(codes)}")

	# ----------------------------
	# Manual Input
	# ----------------------------
	st.markdown("---")
	st.subheader("✍️ Manually Enter Captions")
	manual_input = st.text_area("Enter caption text here...", height=120)

	if manual_input:
	translated = translate_text(manual_input)
	st.write("Translated:", translated)

	entities, sentiment = analyze_text(translated)
	st.write("Entities:", entities)
	st.write("Sentiment:", sentiment)

	keywords = extract_keywords(translated)
	st.write("Keywords:", keywords)
	plot_keywords(keywords)

	codes = auto_code_text(translated)
	st.success(f"Auto-Coded Themes: {', '.join(codes)}")

	manual_tag = st.text_input("➕ Manually Add a Code (Optional)")
	if manual_tag:
	codes.append(manual_tag)

	# Show final result
	st.write("📌 Final Coding for Caption:")
	st.write({
	"caption": manual_input,
	"translated": translated,
	"codes": codes
	})