Spaces:

IqraFatima
/

ZeroPhish

Sleeping

App Files Files Community

IqraFatima commited on Jul 27, 2025

Commit

d5d7753

verified ·

1 Parent(s): fd0b808

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -455

app.py CHANGED Viewed

@@ -1,199 +1,10 @@
-# # app.py v2
-# import os
-# import re
-# import fitz  # PyMuPDF
-# import tempfile
-# from datetime import datetime
-# import base64
-# from gtts import gTTS
-# import streamlit as st
-# from transformers.pipelines import pipeline
-# from groq import Groq
-# # ✅ Hugging Face and GROQ secrets loaded via Hugging Face Spaces Secrets interface
-# # ⛳ Access secrets securely from environment variables
-# GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-# HF_TOKEN = os.getenv("HF_TOKEN")
-# KAGGLE_USERNAME = os.getenv("KAGGLE_USERNAME")
-# KAGGLE_KEY = os.getenv("KAGGLE_KEY")
-# # ✅ Validate secrets
-# if not all([GROQ_API_KEY, HF_TOKEN, KAGGLE_USERNAME, KAGGLE_KEY]):
-#     st.error("❌ One or more required API keys are missing from the environment.")
-#     st.stop()
-# # ✅ Initialize Groq client
-# client = Groq(api_key=GROQ_API_KEY)
-# # ✅ Load phishing detection pipeline from Hugging Face
-# phishing_pipe = pipeline(
-#     "text-classification",
-#     model="ealvaradob/bert-finetuned-phishing",
-#     token=HF_TOKEN
-# )
-# # ✅ Language and role options
-# language_choices = ["English", "Urdu", "French"]
-# role_choices = ["Admin", "Procurement", "Logistics"]
-# # ✅ Glossary terms
-# GLOSSARY = {
-#     "phishing": "Phishing is a scam where attackers trick you into revealing personal information.",
-#     "malware": "Malicious software designed to harm or exploit systems.",
-#     "spam": "Unwanted or unsolicited messages.",
-#     "tone": "The emotional character of the message."
-# }
-# # ✅ Translations (demo dictionary-based)
-# TRANSLATIONS = {
-#     "Phishing": {"Urdu": "فشنگ", "French": "Hameçonnage"},
-#     "Spam": {"Urdu": "سپیم", "French": "Courrier indésirable"},
-#     "Malware": {"Urdu": "میلویئر", "French": "Logiciel malveillant"},
-#     "Safe": {"Urdu": "محفوظ", "French": "Sûr"}
-# }
-# # =======================
-# # Streamlit UI
-# # =======================
-# st.set_page_config(page_title="ZeroPhish Gate", layout="wide")
-# st.title("🛡️ ZeroPhish Gate")
-# st.markdown("AI-powered phishing message detection and explanation.")
-# # Input fields
-# col1, col2 = st.columns([3, 1])
-# with col1:
-#     text_input = st.text_area("✉️ Paste Suspicious Message", height=200)
-#     uploaded_file = st.file_uploader("📄 Upload PDF/TXT (optional)", type=["pdf", "txt"])
-# with col2:
-#     language = st.selectbox("🌐 Preferred Language", language_choices)
-#     role = st.selectbox("🧑‍💼 Your Role", role_choices)
-# analyze_btn = st.button("🔍 Analyze with AI")
-# # =======================
-# # Function Definitions
-# # =======================
-# def extract_text_from_file(file):
-#     if file is None:
-#         return ""
-#     ext = file.name.split(".")[-1].lower()
-#     if ext == "pdf":
-#         doc = fitz.open(stream=file.read(), filetype="pdf")
-#         return "\n".join(page.get_text() for page in doc)
-#     elif ext == "txt":
-#         return file.read().decode("utf-8")
-#     return ""
-# def analyze_with_huggingface(text):
-#     try:
-#         result = phishing_pipe(text)
-#         label = result[0]['label']
-#         confidence = round(result[0]['score'] * 100, 2)
-#         threat_type = {
-#             "PHISHING": "Phishing",
-#             "SPAM": "Spam",
-#             "MALWARE": "Malware",
-#             "LEGITIMATE": "Safe"
-#         }.get(label.upper(), "Unknown")
-#         return label, confidence, threat_type
-#     except Exception as e:
-#         return "Error", 0, f"Error: {e}"
-# def semantic_analysis(text):
-#     response = client.chat.completions.create(
-#         model="llama3-8b-8192",
-#         messages=[
-#             {"role": "system", "content": "You are a cybersecurity assistant."},
-#             {"role": "user", "content": f"Please explain this message in professional tone for a {role} in {language}. Do not end with questions.\n\nMessage:\n{text}"}
-#         ]
-#     )
-#     return response.choices[0].message.content
-# def translate_label(threat_type):
-#     return TRANSLATIONS.get(threat_type, {}).get(language, threat_type)
-# def text_to_speech(text):
-#     tts = gTTS(text=text, lang='en')
-#     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
-#         tts.save(fp.name)
-#         return fp.name
-# def create_report(label, score, threat_type, explanation, text):
-#     ts = datetime.now().strftime("%Y%m%d_%H%M%S")
-#     filename = f"Zerophish_Report_{ts}.txt"
-#     report = f"""
-# 🔍 AI Threat Detection Report
-# Input Message:
-# {text}
-# Prediction: {label}
-# Threat Type: {threat_type}
-# Confidence: {score}%
-# ---
-# 🧠 Explanation:
-# {explanation}
-# """
-#     with open(filename, "w") as f:
-#         f.write(report)
-#     return filename
-# # =======================
-# # Run Analysis
-# # =======================
-# if analyze_btn:
-#     combined_text = text_input
-#     if uploaded_file:
-#         extracted = extract_text_from_file(uploaded_file)
-#         combined_text += "\n" + extracted
-#     if not combined_text.strip():
-#         st.warning("⚠️ Please enter some text or upload a file to analyze.")
-#     else:
-#         label, score, threat_type = analyze_with_huggingface(combined_text)
-#         translated_threat = translate_label(threat_type)
-#         st.subheader("🔍 AI Threat Detection Result")
-#         st.markdown(f"**Prediction:** {label}")
-#         st.markdown(f"**Threat Type:** {threat_type} ({translated_threat})")
-#         st.markdown(f"**Confidence:** {score}%")
-#         explanation = ""
-#         if threat_type.lower() != "safe":
-#             with st.expander("🧠 Semantic Reanalysis by LLaMA"):
-#                 explanation = semantic_analysis(combined_text)
-#                 st.write(explanation)
-#                 if st.button("🔊 Play Explanation as Audio"):
-#                     audio_path = text_to_speech(explanation)
-#                     with open(audio_path, "rb") as f:
-#                         st.audio(f.read(), format="audio/mp3")
-#         with st.expander("📜 Glossary Help"):
-#             for term, definition in GLOSSARY.items():
-#                 st.markdown(f"**{term.capitalize()}**: {definition}")
-#         if explanation:
-#             report_path = create_report(label, score, threat_type, explanation, combined_text)
-#             with open(report_path, "rb") as f:
-#                 b64 = base64.b64encode(f.read()).decode()
-#                 href = f'<a href="data:file/txt;base64,{b64}" download="{report_path}">📄 Download Full Report</a>'
-#                 st.markdown(href, unsafe_allow_html=True)
-#app v3
 import os
 import re
 import fitz  # PyMuPDF
 import tempfile
 import base64
 from datetime import datetime
 from gtts import gTTS
 import streamlit as st
 from transformers import pipeline
@@ -242,9 +53,11 @@ TRANSLATIONS = {
     "Safe": {"Urdu": "محفوظ", "French": "Sûr"}
 }
-# ✅ In-memory history
 if "history" not in st.session_state:
     st.session_state.history = []
 # =======================
 # Streamlit UI
@@ -255,13 +68,38 @@ st.markdown("""
     <style>
     .report-container {
         border: 1px solid #ddd;
-        padding: 1rem;
-        border-radius: 10px;
-        background-color: #f9f9f9;
     }
     .highlight {
         font-weight: bold;
         color: #d9534f;
     }
     </style>
 """, unsafe_allow_html=True)
@@ -326,11 +164,45 @@ def semantic_analysis(text):
 def translate_label(threat_type):
     return TRANSLATIONS.get(threat_type, {}).get(language, threat_type)
-def text_to_speech(text):
-    tts = gTTS(text=text, lang='en')
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
-        tts.save(fp.name)
-        return fp.name
 def render_history():
     with st.expander("🕓 View Analysis History"):
@@ -347,7 +219,9 @@ def render_history():
 # =======================
 if clear_btn:
     st.session_state.history.clear()
-    st.success("✅ History cleared!")
 if analyze_btn:
     combined_text = text_input
@@ -370,14 +244,43 @@ if analyze_btn:
         summary = ""
         if threat_type.lower() != "safe":
-            with st.expander("🧠 Semantic Reanalysis by LLaMA"):
-                summary = semantic_analysis(combined_text)
                 st.write(summary)
-                if st.button("🔊 Play Explanation as Audio"):
-                    audio_path = text_to_speech(summary)
-                    with open(audio_path, "rb") as f:
-                        st.audio(f.read(), format="audio/mp3")
         # Save history
         st.session_state.history.append({
@@ -392,252 +295,3 @@ if analyze_btn:
                 st.markdown(f"**{term.capitalize()}**: {definition}")
 render_history()
-#App v 3
-# app.py
-# # app.py
-# import os
-# import re
-# import fitz  # PyMuPDF
-# import tempfile
-# import base64
-# from datetime import datetime
-# from gtts import gTTS
-# import streamlit as st
-# from transformers.pipelines import pipeline
-# from groq import Groq
-# # ✅ Hugging Face and GROQ secrets loaded via Hugging Face Spaces Secrets interface
-# # ⛳ Access secrets securely from environment variables
-# GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-# HF_TOKEN = os.getenv("HF_TOKEN")
-# KAGGLE_USERNAME = os.getenv("KAGGLE_USERNAME")
-# KAGGLE_KEY = os.getenv("KAGGLE_KEY")
-# # ✅ Validate secrets
-# if not all([GROQ_API_KEY, HF_TOKEN, KAGGLE_USERNAME, KAGGLE_KEY]):
-#     st.error("❌ One or more required API keys are missing from the environment.")
-#     st.stop()
-# # ✅ Initialize Groq client
-# client = Groq(api_key=GROQ_API_KEY)
-# # ✅ Load phishing detection pipeline from Hugging Face
-# phishing_pipe = pipeline(
-#     "text-classification",
-#     model="ealvaradob/bert-finetuned-phishing",
-#     token=HF_TOKEN
-# )
-# # ✅ Language and role options
-# language_choices = ["English", "Urdu", "French"]
-# role_choices = ["Admin", "Procurement", "Logistics"]
-# # ✅ Glossary terms
-# GLOSSARY = {
-#     "phishing": "Phishing is a scam where attackers trick you into revealing personal information.",
-#     "malware": "Malicious software designed to harm or exploit systems.",
-#     "spam": "Unwanted or unsolicited messages.",
-#     "tone": "The emotional character of the message."
-# }
-# # ✅ Translations (demo dictionary-based)
-# TRANSLATIONS = {
-#     "Phishing": {"Urdu": "فشنگ", "French": "Hameçonnage"},
-#     "Spam": {"Urdu": "سپیم", "French": "Courrier indésirable"},
-#     "Malware": {"Urdu": "میلویئر", "French": "Logiciel malveillant"},
-#     "Safe": {"Urdu": "محفوظ", "French": "Sûr"}
-# }
-# # ✅ In-memory history
-# if "history" not in st.session_state:
-#     st.session_state.history = []
-# # =======================
-# # Streamlit UI
-# # =======================
-# st.set_page_config(page_title="ZeroPhish Gate", layout="wide")
-# st.markdown("""
-#     <style>
-#     .report-container {
-#         border: 1px solid #ddd;
-#         padding: 1rem;
-#         border-radius: 10px;
-#         background-color: #f9f9f9;
-#     }
-#     .highlight {
-#         font-weight: bold;
-#         color: #d9534f;
-#     }
-#     </style>
-# """, unsafe_allow_html=True)
-# st.title("🛡️ ZeroPhish Gate")
-# st.markdown("AI-powered phishing message detection and explanation.")
-# # Input fields
-# col1, col2 = st.columns([3, 1])
-# with col1:
-#     text_input = st.text_area("✉️ Paste Suspicious Message", height=200)
-#     uploaded_file = st.file_uploader("📄 Upload PDF/TXT (optional)", type=["pdf", "txt"])
-# with col2:
-#     language = st.selectbox("🌐 Preferred Language", language_choices)
-#     role = st.selectbox("🧑‍💼 Your Role", role_choices)
-# analyze_btn = st.button("🔍 Analyze with AI")
-# clear_btn = st.button("🗑️ Clear History")
-# # =======================
-# # Function Definitions
-# # =======================
-# def extract_text_from_file(file):
-#     if file is None:
-#         return ""
-#     ext = file.name.split(".")[-1].lower()
-#     if ext == "pdf":
-#         doc = fitz.open(stream=file.read(), filetype="pdf")
-#         return "\n".join(page.get_text() for page in doc)
-#     elif ext == "txt":
-#         return file.read().decode("utf-8")
-#     return ""
-# def analyze_with_huggingface(text):
-#     try:
-#         result = phishing_pipe(text)
-#         label = result[0]['label']
-#         confidence = round(result[0]['score'] * 100, 2)
-#         threat_type = {
-#             "PHISHING": "Phishing",
-#             "SPAM": "Spam",
-#             "MALWARE": "Malware",
-#             "LEGITIMATE": "Safe"
-#         }.get(label.upper(), "Unknown")
-#         return label, confidence, threat_type
-#     except Exception as e:
-#         return "Error", 0, f"Error: {e}"
-# def semantic_analysis(text):
-#     response = client.chat.completions.create(
-#         model="llama3-8b-8192",
-#         messages=[
-#             {"role": "system", "content": "You are a cybersecurity assistant."},
-#             {"role": "user", "content": f"Explain this suspicious message for a {role} in {language} without ending in questions:\n{text}"}
-#         ]
-#     )
-#     raw = response.choices[0].message.content
-#     clean = re.sub(r"Is there anything else you'd like.*", "", raw, flags=re.I).strip()
-#     return clean
-# def translate_label(threat_type):
-#     return TRANSLATIONS.get(threat_type, {}).get(language, threat_type)
-# def text_to_speech(text):
-#     try:
-#         tts = gTTS(text=text, lang='en')
-#         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
-#             tts.save(fp.name)
-#             audio_path = fp.name
-#         audio_file = open(audio_path, "rb")
-#         audio_bytes = audio_file.read()
-#         st.audio(audio_bytes, format="audio/mp3")
-#         audio_file.close()
-#         os.remove(audio_path)
-#     except Exception as e:
-#         st.error(f"❌ Audio playback error: {e}")
-# def create_report(label, score, threat_type, explanation, text):
-#     ts = datetime.now().strftime("%Y%m%d_%H%M%S")
-#     filename = f"Zerophish_Report_{ts}.txt"
-#     report = f"""
-# 🔍 AI Threat Detection Report
-# Input Message:
-# {text}
-# Prediction: {label}
-# Threat Type: {threat_type}
-# Confidence: {score}%
-# ---
-# 🧠 Explanation:
-# {explanation}
-# """
-#     with open(filename, "w") as f:
-#         f.write(report)
-#     return filename
-# def render_history():
-#     with st.expander("🕓 View Analysis History", expanded=True):
-#         for i, record in enumerate(reversed(st.session_state.history)):
-#             with st.container():
-#                 st.markdown(f"**🔢 Entry #{len(st.session_state.history) - i}**")
-#                 st.markdown(f"**📝 Input:** {record['input'][:100]}...")
-#                 st.markdown(f"**🔐 Type:** {record['threat']} | **📊 Confidence:** {record['score']}%")
-#                 st.markdown(f"**📖 Summary:** {record['summary'][:200]}...")
-#                 st.markdown("---")
-# # =======================
-# # Run Analysis
-# # =======================
-# if clear_btn:
-#     st.session_state.history.clear()
-#     st.success("✅ History cleared!")
-# if analyze_btn:
-#     combined_text = text_input
-#     if uploaded_file:
-#         extracted = extract_text_from_file(uploaded_file)
-#         combined_text += "\n" + extracted
-#     if not combined_text.strip():
-#         st.warning("⚠️ Please enter some text or upload a file to analyze.")
-#     else:
-#         label, score, threat_type = analyze_with_huggingface(combined_text)
-#         translated_threat = translate_label(threat_type)
-#         st.subheader("🔍 AI Threat Detection Result")
-#         st.markdown(f"<div class='report-container'>"
-#                     f"<p><span class='highlight'>Prediction:</span> {label}</p>"
-#                     f"<p><span class='highlight'>Threat Type:</span> {threat_type} ({translated_threat})</p>"
-#                     f"<p><span class='highlight'>Confidence:</span> {score}%</p>"
-#                     f"</div>", unsafe_allow_html=True)
-#         summary = ""
-#         if threat_type.lower() != "safe":
-#             with st.expander("🧠 Semantic Reanalysis by LLaMA"):
-#                 summary = semantic_analysis(combined_text)
-#                 st.write(summary)
-#                 if st.button("🔊 Play Explanation as Audio"):
-#                     text_to_speech(summary)
-#                 if st.button("📤 Send Report to IT"):
-#                     st.success("📨 Report sent to IT successfully.")
-#         # Save history
-#         st.session_state.history.append({
-#             "input": combined_text,
-#             "threat": threat_type,
-#             "score": score,
-#             "summary": summary
-#         })
-#         # Generate and offer download link
-#         if summary:
-#             report_path = create_report(label, score, threat_type, summary, combined_text)
-#             with open(report_path, "rb") as f:
-#                 b64 = base64.b64encode(f.read()).decode()
-#                 href = f'<a href="data:file/txt;base64,{b64}" download="{report_path}">📄 Download Full Report</a>'
-#                 st.markdown(href, unsafe_allow_html=True)
-#         with st.expander("📜 Glossary Help"):
-#             for term, definition in GLOSSARY.items():
-#                 st.markdown(f"**{term.capitalize()}**: {definition}")
-# render_history()

 import os
 import re
 import fitz  # PyMuPDF
 import tempfile
 import base64
 from datetime import datetime
+from io import BytesIO
 from gtts import gTTS
 import streamlit as st
 from transformers import pipeline
     "Safe": {"Urdu": "محفوظ", "French": "Sûr"}
 }
+# ✅ In-memory history and audio state
 if "history" not in st.session_state:
     st.session_state.history = []
+if "current_audio" not in st.session_state:
+    st.session_state.current_audio = None
 # =======================
 # Streamlit UI
     <style>
     .report-container {
         border: 1px solid #ddd;
+        padding: 1.5rem;
+        border-radius: 15px;
+        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        margin: 1rem 0;
     }
     .highlight {
         font-weight: bold;
         color: #d9534f;
+        background-color: #fff3cd;
+        padding: 2px 6px;
+        border-radius: 4px;
+    }
+    .audio-section {
+        background-color: #e8f4f8;
+        padding: 1rem;
+        border-radius: 10px;
+        border-left: 4px solid #17a2b8;
+        margin: 1rem 0;
+    }
+    .success-audio {
+        color: #155724;
+        background-color: #d4edda;
+        border: 1px solid #c3e6cb;
+        padding: 0.75rem;
+        border-radius: 0.375rem;
+        margin: 0.5rem 0;
+    }
+    .stAudio > div {
+        background-color: #f8f9fa;
+        border-radius: 10px;
+        padding: 0.5rem;
     }
     </style>
 """, unsafe_allow_html=True)
 def translate_label(threat_type):
     return TRANSLATIONS.get(threat_type, {}).get(language, threat_type)
+def text_to_speech(text, language_code='en'):
+    """Convert text to speech and return audio bytes"""
+    try:
+        # Map languages to gTTS language codes
+        lang_map = {
+            "English": "en",
+            "Urdu": "ur",
+            "French": "fr"
+        }
+        lang_code = lang_map.get(language_code, "en")
+        # Limit text length for better performance
+        if len(text) > 1000:
+            text = text[:1000] + "... (truncated for audio)"
+        # Create TTS object
+        tts = gTTS(text=text, lang=lang_code, slow=False)
+        # Use BytesIO to handle audio in memory
+        audio_buffer = BytesIO()
+        tts.write_to_fp(audio_buffer)
+        audio_buffer.seek(0)
+        return audio_buffer.getvalue()
+    except Exception as e:
+        st.error(f"❌ Audio generation failed: {str(e)}")
+        # Try with English as fallback
+        if lang_code != 'en':
+            try:
+                st.info("🔄 Trying with English language...")
+                tts = gTTS(text=text, lang='en', slow=False)
+                audio_buffer = BytesIO()
+                tts.write_to_fp(audio_buffer)
+                audio_buffer.seek(0)
+                return audio_buffer.getvalue()
+            except:
+                pass
+        return None
 def render_history():
     with st.expander("🕓 View Analysis History"):
 # =======================
 if clear_btn:
     st.session_state.history.clear()
+    if 'current_audio' in st.session_state:
+        st.session_state.current_audio = None
+    st.success("✅ History and audio cleared!")
 if analyze_btn:
     combined_text = text_input
         summary = ""
         if threat_type.lower() != "safe":
+            with st.expander("🧠 Semantic Reanalysis by LLaMA", expanded=True):
+                with st.spinner("🤖 Generating AI explanation..."):
+                    summary = semantic_analysis(combined_text)
                 st.write(summary)
+                # Enhanced Audio section
+                st.markdown("---")
+                st.markdown("### 🎧 Audio Explanation")
+                # Create audio content upfront to avoid regeneration
+                if 'current_audio' not in st.session_state:
+                    st.session_state.current_audio = None
+                col_audio1, col_audio2, col_audio3 = st.columns([1, 1, 2])
+                with col_audio1:
+                    if st.button("🎵 Generate Audio", key="gen_audio_btn", type="primary"):
+                        with st.spinner("🎵 Creating audio..."):
+                            st.session_state.current_audio = text_to_speech(summary, language)
+                            if st.session_state.current_audio:
+                                st.success("✅ Audio ready!")
+                            else:
+                                st.error("❌ Audio generation failed")
+                with col_audio2:
+                    if st.button("🔄 Refresh Audio", key="refresh_audio_btn"):
+                        st.session_state.current_audio = None
+                        st.info("🔄 Audio cleared. Click Generate Audio again.")
+                # Display audio player if audio is available
+                if st.session_state.current_audio:
+                    st.markdown('<div class="audio-section">', unsafe_allow_html=True)
+                    st.markdown("**🔊 Click play button below:**")
+                    st.audio(st.session_state.current_audio, format="audio/mp3")
+                    st.markdown("</div>", unsafe_allow_html=True)
+                else:
+                    st.info("🎵 Click 'Generate Audio' to hear the AI explanation")
         # Save history
         st.session_state.history.append({
                 st.markdown(f"**{term.capitalize()}**: {definition}")
 render_history()