Spaces:

ahm14
/

Test

Build error

App Files Files Community

ahm14 commited on Mar 15, 2025

Commit

d0f1307

verified ·

1 Parent(s): 758d15e

Create app.py

Browse files

Files changed (1) hide show

app.py +82 -0

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import streamlit as st
+from docx import Document
+from transformers import pipeline
+from langdetect import detect
+import spacy
+# Load NLP models
+nlp = spacy.load("en_core_web_sm")
+# Load Llama 3 summarization model
+llama_summarizer = pipeline("summarization", model="meta-llama/Meta-Llama-3-8B")
+# Load Gemma 2-9B-IT for recommendations
+gemma_recommender = pipeline("text-generation", model="google/gemma-2b-it")
+# Function to extract text from a DOCX file
+def extract_text_from_docx(docx_file):
+    doc = Document(docx_file)
+    text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
+    return text
+# Function to detect document language
+def detect_language(text):
+    return detect(text)
+# Function to extract metadata
+def extract_metadata(text):
+    doc = nlp(text)
+    word_count = len(text.split())
+    entities = {ent.label_: ent.text for ent in doc.ents}
+    return {
+        "Word Count": word_count,
+        "Entities": entities
+    }
+# Function to generate abstract (summary) using Llama 3
+def generate_summary(text):
+    summary = llama_summarizer(text, max_length=200, min_length=50, do_sample=False)
+    return summary[0]['summary_text']
+# Function to generate recommendations using Gemma 2-9B-IT
+def generate_recommendations(text):
+    prompt = f"Provide three key recommendations based on the following document:\n{text}\n\nRecommendations:"
+    recommendations = gemma_recommender(prompt, max_length=300, num_return_sequences=1, do_sample=False)
+    return recommendations[0]['generated_text']
+# Streamlit UI
+st.title("📄 AI-Powered Multi-Language Document Analyzer")
+uploaded_file = st.file_uploader("Upload a Word Document", type=["docx"])
+if uploaded_file:
+    st.success("File uploaded successfully!")
+    # Extract text
+    doc_text = extract_text_from_docx(uploaded_file)
+    # Detect language
+    language = detect_language(doc_text)
+    # Extract metadata
+    metadata = extract_metadata(doc_text)
+    st.subheader("Extracted Text:")
+    st.text_area("Document Content", doc_text, height=250)
+    st.subheader("🗣️ Detected Language:")
+    st.write(language)
+    st.subheader("📊 Metadata:")
+    st.json(metadata)
+    if st.button("Generate Abstract & Recommendations"):
+        with st.spinner("Analyzing..."):
+            summary = generate_summary(doc_text)
+            recommendations = generate_recommendations(doc_text)
+        st.subheader("📌 Abstract (Summary) - Llama 3")
+        st.write(summary)
+        st.subheader("✅ Recommendations - Gemma 2-9B-IT")
+        st.write(recommendations)