ahm14 commited on
Commit
fb6aefd
Β·
verified Β·
1 Parent(s): 0bfd1a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -75
app.py CHANGED
@@ -6,110 +6,135 @@ from collections import Counter
6
  import matplotlib.pyplot as plt
7
  from googletrans import Translator
8
  import spacy
9
- from io import BytesIO
10
 
11
- # Load spaCy model for Named Entity Recognition and general NLP tasks
12
  nlp = spacy.load("en_core_web_sm")
 
13
 
14
- # File upload handler
15
- uploaded_file = st.file_uploader("Upload a document (DOCX, PDF, Excel)", type=["docx", "pdf", "xlsx"])
 
16
 
 
 
 
17
  def extract_text_from_docx(uploaded_file):
18
  doc = docx.Document(uploaded_file)
19
- text = "\n".join([para.text for para in doc.paragraphs])
20
- return text
21
 
22
  def extract_text_from_pdf(uploaded_file):
23
  reader = PyPDF2.PdfReader(uploaded_file)
24
- text = ""
25
- for page in reader.pages:
26
- text += page.extract_text()
27
- return text
28
 
29
  def extract_text_from_excel(uploaded_file):
30
  df = pd.read_excel(uploaded_file)
31
- text = df.to_string() # Combine all data into a single string
32
- return text
 
 
 
 
 
 
33
 
34
- # AI-powered document analysis functions
35
  def analyze_text(text):
36
  doc = nlp(text)
37
- named_entities = [(ent.text, ent.label_) for ent in doc.ents]
38
-
39
- # Simple sentiment analysis (for demonstration)
40
  sentiment = "Positive" if "good" in text.lower() else "Negative"
41
-
42
- return named_entities, sentiment
43
 
44
  def extract_keywords(text, top_n=10):
45
- # Simple word count to extract top N frequent words (excluding stop words)
46
- words = [word.lower() for word in text.split() if len(word) > 3]
47
  word_count = Counter(words)
48
- most_common = word_count.most_common(top_n)
49
- return most_common
50
 
51
  def plot_keywords(keywords):
52
  words, counts = zip(*keywords)
53
  fig, ax = plt.subplots()
54
  ax.barh(words, counts)
55
  ax.set_xlabel('Frequency')
56
- ax.set_ylabel('Keywords')
57
- plt.title("Top Keywords")
58
  st.pyplot(fig)
59
 
60
- # Multilingual support - translation
61
- def translate_text(text):
62
- translator = Translator()
63
- translated = translator.translate(text, src='auto', dest='en')
64
- return translated.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- # Display the UI components
67
- if uploaded_file is not None:
68
- file_extension = uploaded_file.name.split('.')[-1].lower()
69
-
70
- if file_extension == 'docx':
71
- text = extract_text_from_docx(uploaded_file)
72
- elif file_extension == 'pdf':
73
- text = extract_text_from_pdf(uploaded_file)
74
- elif file_extension == 'xlsx':
75
- text = extract_text_from_excel(uploaded_file)
76
-
77
- st.write("Document Text Preview:")
78
- st.text_area("Extracted Text", text, height=200)
79
-
80
- # Translate the document text to English if needed
81
- translated_text = translate_text(text)
82
- st.write("Translated Text (English):")
83
- st.text_area("Translated Text", translated_text, height=200)
84
-
85
- # Perform AI analysis on the document text
86
- named_entities, sentiment = analyze_text(translated_text)
87
- st.write("Named Entities Extracted:")
88
- st.write(named_entities)
89
- st.write(f"Sentiment: {sentiment}")
90
-
91
- # Keyword extraction and visualization
92
  keywords = extract_keywords(translated_text)
93
- st.write("Top Keywords:")
94
  st.write(keywords)
95
  plot_keywords(keywords)
96
 
97
- # Manual text input for captions
98
- user_input = st.text_area("Manually Input Captions")
99
-
100
- if user_input:
101
- translated_input = translate_text(user_input)
102
- st.write("Translated Input Text (English):")
103
- st.text_area("Translated Input", translated_input, height=200)
104
-
105
- # AI analysis on the manual input
106
- named_entities_input, sentiment_input = analyze_text(translated_input)
107
- st.write("Named Entities in Input Text:")
108
- st.write(named_entities_input)
109
- st.write(f"Sentiment: {sentiment_input}")
110
-
111
- # Keyword extraction for manual input
112
- keywords_input = extract_keywords(translated_input)
113
- st.write("Top Keywords in Input:")
114
- st.write(keywords_input)
115
- plot_keywords(keywords_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import matplotlib.pyplot as plt
7
  from googletrans import Translator
8
  import spacy
 
9
 
10
+ # Load English NLP model
11
  nlp = spacy.load("en_core_web_sm")
12
+ translator = Translator()
13
 
14
+ st.set_page_config(page_title="AI NVivo Coding App", layout="wide")
15
+ st.title("🧠 AI-Powered NVivo App (Text Analysis + Coding)")
16
+ st.markdown("Upload files or input captions manually. Analyze & code your qualitative data automatically!")
17
 
18
+ # ----------------------------
19
+ # Text Extraction Functions
20
+ # ----------------------------
21
  def extract_text_from_docx(uploaded_file):
22
  doc = docx.Document(uploaded_file)
23
+ return "\n".join([para.text for para in doc.paragraphs])
 
24
 
25
  def extract_text_from_pdf(uploaded_file):
26
  reader = PyPDF2.PdfReader(uploaded_file)
27
+ return "".join([page.extract_text() for page in reader.pages])
 
 
 
28
 
29
  def extract_text_from_excel(uploaded_file):
30
  df = pd.read_excel(uploaded_file)
31
+ return "\n".join(df.astype(str).apply(lambda x: " ".join(x), axis=1))
32
+
33
+ # ----------------------------
34
+ # NLP + AI Analysis
35
+ # ----------------------------
36
+ def translate_text(text):
37
+ translated = translator.translate(text, src='auto', dest='en')
38
+ return translated.text
39
 
 
40
  def analyze_text(text):
41
  doc = nlp(text)
42
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
 
 
43
  sentiment = "Positive" if "good" in text.lower() else "Negative"
44
+ return entities, sentiment
 
45
 
46
  def extract_keywords(text, top_n=10):
47
+ words = [word.lower() for word in text.split() if len(word) > 3 and word.isalpha()]
 
48
  word_count = Counter(words)
49
+ return word_count.most_common(top_n)
 
50
 
51
  def plot_keywords(keywords):
52
  words, counts = zip(*keywords)
53
  fig, ax = plt.subplots()
54
  ax.barh(words, counts)
55
  ax.set_xlabel('Frequency')
56
+ ax.set_title("Top Keywords")
 
57
  st.pyplot(fig)
58
 
59
+ def auto_code_text(text):
60
+ themes = {
61
+ "activism": ["march", "protest", "rights", "resist"],
62
+ "intersectionality": ["women", "lgbt", "race", "class"],
63
+ "call_to_action": ["join", "support", "attend", "speak"],
64
+ "strategic_framing": ["narrative", "frame", "message"],
65
+ "inclusivity": ["diverse", "all", "together", "inclusion"]
66
+ }
67
+ codes = []
68
+ for code, keywords in themes.items():
69
+ if any(word in text.lower() for word in keywords):
70
+ codes.append(code)
71
+ return codes if codes else ["uncategorized"]
72
+
73
+ # ----------------------------
74
+ # File Upload
75
+ # ----------------------------
76
+ uploaded_file = st.file_uploader("πŸ“‚ Upload a file", type=["docx", "pdf", "xlsx"])
77
+
78
+ if uploaded_file:
79
+ ext = uploaded_file.name.split('.')[-1]
80
+ if ext == 'docx':
81
+ raw_text = extract_text_from_docx(uploaded_file)
82
+ elif ext == 'pdf':
83
+ raw_text = extract_text_from_pdf(uploaded_file)
84
+ elif ext == 'xlsx':
85
+ raw_text = extract_text_from_excel(uploaded_file)
86
+
87
+ st.subheader("πŸ“„ Extracted Text")
88
+ st.text_area("Raw Text", raw_text, height=150)
89
+
90
+ translated_text = translate_text(raw_text)
91
+ st.subheader("🌍 Translated to English")
92
+ st.text_area("Translated Text", translated_text, height=150)
93
+
94
+ entities, sentiment = analyze_text(translated_text)
95
+ st.subheader("🧠 Named Entities")
96
+ st.write(entities)
97
+ st.markdown(f"**Sentiment:** {sentiment}")
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  keywords = extract_keywords(translated_text)
100
+ st.subheader("πŸ”‘ Top Keywords")
101
  st.write(keywords)
102
  plot_keywords(keywords)
103
 
104
+ st.subheader("🏷️ Auto Codes for Full Document")
105
+ codes = auto_code_text(translated_text)
106
+ st.write(f"Detected Codes: {', '.join(codes)}")
107
+
108
+ # ----------------------------
109
+ # Manual Input
110
+ # ----------------------------
111
+ st.markdown("---")
112
+ st.subheader("✍️ Manually Enter Captions")
113
+ manual_input = st.text_area("Enter caption text here...", height=120)
114
+
115
+ if manual_input:
116
+ translated = translate_text(manual_input)
117
+ st.write("**Translated:**", translated)
118
+
119
+ entities, sentiment = analyze_text(translated)
120
+ st.write("**Entities:**", entities)
121
+ st.write("**Sentiment:**", sentiment)
122
+
123
+ keywords = extract_keywords(translated)
124
+ st.write("**Keywords:**", keywords)
125
+ plot_keywords(keywords)
126
+
127
+ codes = auto_code_text(translated)
128
+ st.success(f"Auto-Coded Themes: {', '.join(codes)}")
129
+
130
+ manual_tag = st.text_input("βž• Manually Add a Code (Optional)")
131
+ if manual_tag:
132
+ codes.append(manual_tag)
133
+
134
+ # Show final result
135
+ st.write("πŸ“Œ Final Coding for Caption:")
136
+ st.write({
137
+ "caption": manual_input,
138
+ "translated": translated,
139
+ "codes": codes
140
+ })