Spaces:

ahm14
/

NVIVO

Build error

App Files Files Community

ahm14 commited on Apr 11, 2025

Commit

fb6aefd

verified ·

1 Parent(s): 0bfd1a3

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -75

app.py CHANGED Viewed

@@ -6,110 +6,135 @@ from collections import Counter
 import matplotlib.pyplot as plt
 from googletrans import Translator
 import spacy
-from io import BytesIO
-# Load spaCy model for Named Entity Recognition and general NLP tasks
 nlp = spacy.load("en_core_web_sm")
-# File upload handler
-uploaded_file = st.file_uploader("Upload a document (DOCX, PDF, Excel)", type=["docx", "pdf", "xlsx"])
 def extract_text_from_docx(uploaded_file):
     doc = docx.Document(uploaded_file)
-    text = "\n".join([para.text for para in doc.paragraphs])
-    return text
 def extract_text_from_pdf(uploaded_file):
     reader = PyPDF2.PdfReader(uploaded_file)
-    text = ""
-    for page in reader.pages:
-        text += page.extract_text()
-    return text
 def extract_text_from_excel(uploaded_file):
     df = pd.read_excel(uploaded_file)
-    text = df.to_string()  # Combine all data into a single string
-    return text
-# AI-powered document analysis functions
 def analyze_text(text):
     doc = nlp(text)
-    named_entities = [(ent.text, ent.label_) for ent in doc.ents]
-    # Simple sentiment analysis (for demonstration)
     sentiment = "Positive" if "good" in text.lower() else "Negative"
-    return named_entities, sentiment
 def extract_keywords(text, top_n=10):
-    # Simple word count to extract top N frequent words (excluding stop words)
-    words = [word.lower() for word in text.split() if len(word) > 3]
     word_count = Counter(words)
-    most_common = word_count.most_common(top_n)
-    return most_common
 def plot_keywords(keywords):
     words, counts = zip(*keywords)
     fig, ax = plt.subplots()
     ax.barh(words, counts)
     ax.set_xlabel('Frequency')
-    ax.set_ylabel('Keywords')
-    plt.title("Top Keywords")
     st.pyplot(fig)
-# Multilingual support - translation
-def translate_text(text):
-    translator = Translator()
-    translated = translator.translate(text, src='auto', dest='en')
-    return translated.text
-# Display the UI components
-if uploaded_file is not None:
-    file_extension = uploaded_file.name.split('.')[-1].lower()
-    if file_extension == 'docx':
-        text = extract_text_from_docx(uploaded_file)
-    elif file_extension == 'pdf':
-        text = extract_text_from_pdf(uploaded_file)
-    elif file_extension == 'xlsx':
-        text = extract_text_from_excel(uploaded_file)
-    st.write("Document Text Preview:")
-    st.text_area("Extracted Text", text, height=200)
-    # Translate the document text to English if needed
-    translated_text = translate_text(text)
-    st.write("Translated Text (English):")
-    st.text_area("Translated Text", translated_text, height=200)
-    # Perform AI analysis on the document text
-    named_entities, sentiment = analyze_text(translated_text)
-    st.write("Named Entities Extracted:")
-    st.write(named_entities)
-    st.write(f"Sentiment: {sentiment}")
-    # Keyword extraction and visualization
     keywords = extract_keywords(translated_text)
-    st.write("Top Keywords:")
     st.write(keywords)
     plot_keywords(keywords)
-# Manual text input for captions
-user_input = st.text_area("Manually Input Captions")
-if user_input:
-    translated_input = translate_text(user_input)
-    st.write("Translated Input Text (English):")
-    st.text_area("Translated Input", translated_input, height=200)
-    # AI analysis on the manual input
-    named_entities_input, sentiment_input = analyze_text(translated_input)
-    st.write("Named Entities in Input Text:")
-    st.write(named_entities_input)
-    st.write(f"Sentiment: {sentiment_input}")
-    # Keyword extraction for manual input
-    keywords_input = extract_keywords(translated_input)
-    st.write("Top Keywords in Input:")
-    st.write(keywords_input)
-    plot_keywords(keywords_input)

 import matplotlib.pyplot as plt
 from googletrans import Translator
 import spacy
+# Load English NLP model
 nlp = spacy.load("en_core_web_sm")
+translator = Translator()
+st.set_page_config(page_title="AI NVivo Coding App", layout="wide")
+st.title("🧠 AI-Powered NVivo App (Text Analysis + Coding)")
+st.markdown("Upload files or input captions manually. Analyze & code your qualitative data automatically!")
+# ----------------------------
+# Text Extraction Functions
+# ----------------------------
 def extract_text_from_docx(uploaded_file):
     doc = docx.Document(uploaded_file)
+    return "\n".join([para.text for para in doc.paragraphs])
 def extract_text_from_pdf(uploaded_file):
     reader = PyPDF2.PdfReader(uploaded_file)
+    return "".join([page.extract_text() for page in reader.pages])
 def extract_text_from_excel(uploaded_file):
     df = pd.read_excel(uploaded_file)
+    return "\n".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))
+# ----------------------------
+# NLP + AI Analysis
+# ----------------------------
+def translate_text(text):
+    translated = translator.translate(text, src='auto', dest='en')
+    return translated.text
 def analyze_text(text):
     doc = nlp(text)
+    entities = [(ent.text, ent.label_) for ent in doc.ents]
     sentiment = "Positive" if "good" in text.lower() else "Negative"
+    return entities, sentiment
 def extract_keywords(text, top_n=10):
+    words = [word.lower() for word in text.split() if len(word) > 3 and word.isalpha()]
     word_count = Counter(words)
+    return word_count.most_common(top_n)
 def plot_keywords(keywords):
     words, counts = zip(*keywords)
     fig, ax = plt.subplots()
     ax.barh(words, counts)
     ax.set_xlabel('Frequency')
+    ax.set_title("Top Keywords")
     st.pyplot(fig)
+def auto_code_text(text):
+    themes = {
+        "activism": ["march", "protest", "rights", "resist"],
+        "intersectionality": ["women", "lgbt", "race", "class"],
+        "call_to_action": ["join", "support", "attend", "speak"],
+        "strategic_framing": ["narrative", "frame", "message"],
+        "inclusivity": ["diverse", "all", "together", "inclusion"]
+    }
+    codes = []
+    for code, keywords in themes.items():
+        if any(word in text.lower() for word in keywords):
+            codes.append(code)
+    return codes if codes else ["uncategorized"]
+# ----------------------------
+# File Upload
+# ----------------------------
+uploaded_file = st.file_uploader("📂 Upload a file", type=["docx", "pdf", "xlsx"])
+if uploaded_file:
+    ext = uploaded_file.name.split('.')[-1]
+    if ext == 'docx':
+        raw_text = extract_text_from_docx(uploaded_file)
+    elif ext == 'pdf':
+        raw_text = extract_text_from_pdf(uploaded_file)
+    elif ext == 'xlsx':
+        raw_text = extract_text_from_excel(uploaded_file)
+    st.subheader("📄 Extracted Text")
+    st.text_area("Raw Text", raw_text, height=150)
+    translated_text = translate_text(raw_text)
+    st.subheader("🌍 Translated to English")
+    st.text_area("Translated Text", translated_text, height=150)
+    entities, sentiment = analyze_text(translated_text)
+    st.subheader("🧠 Named Entities")
+    st.write(entities)
+    st.markdown(f"**Sentiment:** {sentiment}")
     keywords = extract_keywords(translated_text)
+    st.subheader("🔑 Top Keywords")
     st.write(keywords)
     plot_keywords(keywords)
+    st.subheader("🏷️ Auto Codes for Full Document")
+    codes = auto_code_text(translated_text)
+    st.write(f"Detected Codes: {', '.join(codes)}")
+# ----------------------------
+# Manual Input
+# ----------------------------
+st.markdown("---")
+st.subheader("✍️ Manually Enter Captions")
+manual_input = st.text_area("Enter caption text here...", height=120)
+if manual_input:
+    translated = translate_text(manual_input)
+    st.write("**Translated:**", translated)
+    entities, sentiment = analyze_text(translated)
+    st.write("**Entities:**", entities)
+    st.write("**Sentiment:**", sentiment)
+    keywords = extract_keywords(translated)
+    st.write("**Keywords:**", keywords)
+    plot_keywords(keywords)
+    codes = auto_code_text(translated)
+    st.success(f"Auto-Coded Themes: {', '.join(codes)}")
+    manual_tag = st.text_input("➕ Manually Add a Code (Optional)")
+    if manual_tag:
+        codes.append(manual_tag)
+    # Show final result
+    st.write("📌 Final Coding for Caption:")
+    st.write({
+        "caption": manual_input,
+        "translated": translated,
+        "codes": codes
+    })