ahm14 commited on
Commit
d0f1307
Β·
verified Β·
1 Parent(s): 758d15e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from docx import Document
3
+ from transformers import pipeline
4
+ from langdetect import detect
5
+ import spacy
6
+
7
+ # Load NLP models
8
+ nlp = spacy.load("en_core_web_sm")
9
+
10
+ # Load Llama 3 summarization model
11
+ llama_summarizer = pipeline("summarization", model="meta-llama/Meta-Llama-3-8B")
12
+
13
+ # Load Gemma 2-9B-IT for recommendations
14
+ gemma_recommender = pipeline("text-generation", model="google/gemma-2b-it")
15
+
16
+ # Function to extract text from a DOCX file
17
+ def extract_text_from_docx(docx_file):
18
+ doc = Document(docx_file)
19
+ text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
20
+ return text
21
+
22
+ # Function to detect document language
23
+ def detect_language(text):
24
+ return detect(text)
25
+
26
+ # Function to extract metadata
27
+ def extract_metadata(text):
28
+ doc = nlp(text)
29
+ word_count = len(text.split())
30
+ entities = {ent.label_: ent.text for ent in doc.ents}
31
+ return {
32
+ "Word Count": word_count,
33
+ "Entities": entities
34
+ }
35
+
36
+ # Function to generate abstract (summary) using Llama 3
37
+ def generate_summary(text):
38
+ summary = llama_summarizer(text, max_length=200, min_length=50, do_sample=False)
39
+ return summary[0]['summary_text']
40
+
41
+ # Function to generate recommendations using Gemma 2-9B-IT
42
+ def generate_recommendations(text):
43
+ prompt = f"Provide three key recommendations based on the following document:\n{text}\n\nRecommendations:"
44
+ recommendations = gemma_recommender(prompt, max_length=300, num_return_sequences=1, do_sample=False)
45
+ return recommendations[0]['generated_text']
46
+
47
+ # Streamlit UI
48
+ st.title("πŸ“„ AI-Powered Multi-Language Document Analyzer")
49
+
50
+ uploaded_file = st.file_uploader("Upload a Word Document", type=["docx"])
51
+
52
+ if uploaded_file:
53
+ st.success("File uploaded successfully!")
54
+
55
+ # Extract text
56
+ doc_text = extract_text_from_docx(uploaded_file)
57
+
58
+ # Detect language
59
+ language = detect_language(doc_text)
60
+
61
+ # Extract metadata
62
+ metadata = extract_metadata(doc_text)
63
+
64
+ st.subheader("Extracted Text:")
65
+ st.text_area("Document Content", doc_text, height=250)
66
+
67
+ st.subheader("πŸ—£οΈ Detected Language:")
68
+ st.write(language)
69
+
70
+ st.subheader("πŸ“Š Metadata:")
71
+ st.json(metadata)
72
+
73
+ if st.button("Generate Abstract & Recommendations"):
74
+ with st.spinner("Analyzing..."):
75
+ summary = generate_summary(doc_text)
76
+ recommendations = generate_recommendations(doc_text)
77
+
78
+ st.subheader("πŸ“Œ Abstract (Summary) - Llama 3")
79
+ st.write(summary)
80
+
81
+ st.subheader("βœ… Recommendations - Gemma 2-9B-IT")
82
+ st.write(recommendations)