Spaces:

raksama19
/

DeepSeek-Test

Sleeping

App Files Files Community

raksa-the-wildcats commited on Jun 23, 2025

Commit

f8c0dab

1 Parent(s): d8024c0

first commit

Browse files

Files changed (9) hide show

README.md +21 -4
app.py +415 -0
knowledge_base.json +0 -0
pdf_processor.py +143 -0
requirements.txt +7 -0
utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-312.pyc +0 -0
utils/__pycache__/retriever.cpython-312.pyc +0 -0
utils/retriever.py +49 -0

README.md CHANGED Viewed

@@ -1,12 +1,29 @@
 ---
-title: DeepSeek Test
-emoji: 🐠
-colorFrom: blue
-colorTo: blue
 sdk: gradio
 sdk_version: 5.34.2
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Chatbot
+emoji: 🔥
+colorFrom: green
+colorTo: purple
 sdk: gradio
 sdk_version: 5.34.2
 app_file: app.py
 pinned: false
 ---
+# Web Accessibility Chatbot
+An AI-powered learning assistant for university students studying web accessibility, built with WebAIM resources and DeepSeek-R1.
+## Features
+- Answers based on authoritative WebAIM documentation
+- Proper source citations
+- Student-friendly explanations
+- Code examples and best practices
+- Assignment guidance
+## Setup
+1. Upload your WebAIM PDFs to the `pdfs/` directory
+2. Run the PDF processor to create the knowledge base
+3. Set your Hugging Face token in the environment variables
+4. Deploy to Hugging Face Spaces
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,415 @@

+import gradio as gr
+import os
+from huggingface_hub import InferenceClient
+from utils.retriever import KnowledgeRetriever
+import json
+class AccessibilityChatbot:
+    def __init__(self):
+        # Initialize DeepSeek-R1 client
+        self.client = InferenceClient(
+            model="deepseek-ai/DeepSeek-R1",
+            token=os.getenv("HF_TOKEN")
+        )
+        # Initialize knowledge retriever
+        self.retriever = KnowledgeRetriever()
+        # System prompt for accessibility education
+        self.system_prompt = """You are an expert web accessibility instructor helping university students learn about web accessibility.
+Your knowledge comes from WebAIM resources, which are authoritative sources for web accessibility information.
+Guidelines for responses:
+1. Provide clear, student-friendly explanations
+2. Use the provided WebAIM context to answer questions accurately
+3. Always cite your sources by mentioning the WebAIM document and page number
+4. Include practical examples and code snippets when relevant
+5. Break down complex concepts into digestible parts
+6. Encourage best practices and standards compliance
+7. If asked about assignments, provide actionable guidance
+Remember: You're teaching students, so be encouraging and educational while maintaining accuracy."""
+    def generate_response(self, message, history):
+        """Generate response using DeepSeek-R1 with WebAIM context"""
+        # Retrieve relevant content from WebAIM PDFs
+        relevant_content = self.retriever.retrieve_relevant_content(message)
+        context = self.retriever.format_context_for_llm(relevant_content)
+        # Prepare messages for the LLM
+        messages = [
+            {"role": "system", "content": f"{self.system_prompt}\n\nContext from WebAIM resources:\n{context}"}
+        ]
+        # Add conversation history
+        for human, assistant in history:
+            messages.append({"role": "user", "content": human})
+            messages.append({"role": "assistant", "content": assistant})
+        # Add current message
+        messages.append({"role": "user", "content": message})
+        try:
+            response = self.client.chat_completion(
+                messages=messages,
+                max_tokens=1500,
+                temperature=0.7,
+                top_p=0.9
+            )
+            assistant_response = response.choices[0].message.content
+            # Add source information
+            if relevant_content and assistant_response:
+                sources = self.format_sources(relevant_content)
+                assistant_response += f"\n\n**Sources:**\n{sources}"
+            return assistant_response or "I apologize, but I couldn't generate a response. Please try again."
+        except Exception as e:
+            return f"I apologize, but I'm experiencing technical difficulties. Please try again. Error: {str(e)}"
+    def format_sources(self, content_list):
+        """Format source citations for display"""
+        sources = []
+        seen_sources = set()
+        for item in content_list:
+            source_key = f"{item['source_file']}_{item['page_number']}"
+            if source_key not in seen_sources:
+                sources.append(f"• {item['source_file']} (Page {item['page_number']})")
+                seen_sources.add(source_key)
+        return "\n".join(sources)
+# Initialize chatbot
+chatbot = AccessibilityChatbot()
+# Create Gradio interface
+def create_interface():
+    # Custom CSS for improved styling
+    custom_css = """
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: 0 auto !important;
+    }
+    .main-header {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        padding: 2rem;
+        border-radius: 15px;
+        margin-bottom: 2rem;
+        text-align: center;
+        box-shadow: 0 8px 32px rgba(0,0,0,0.1);
+    }
+    .main-header h1 {
+        margin: 0;
+        font-size: 2.5rem;
+        font-weight: 700;
+        text-shadow: 0 2px 4px rgba(0,0,0,0.3);
+    }
+    .main-header p {
+        margin: 1rem 0 0 0;
+        font-size: 1.1rem;
+        opacity: 0.9;
+    }
+    .feature-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+        gap: 1rem;
+        margin: 2rem 0;
+    }
+    .feature-card {
+        background: white;
+        padding: 1.5rem;
+        border-radius: 12px;
+        border: 1px solid #e1e5e9;
+        box-shadow: 0 4px 6px rgba(0,0,0,0.05);
+        transition: transform 0.2s, box-shadow 0.2s;
+    }
+    .feature-card:hover {
+        transform: translateY(-2px);
+        box-shadow: 0 8px 25px rgba(0,0,0,0.1);
+    }
+    .feature-card h3 {
+        color: #667eea;
+        margin: 0 0 0.5rem 0;
+        font-size: 1.2rem;
+    }
+    .chat-container {
+        background: white;
+        border-radius: 15px;
+        padding: 2rem;
+        box-shadow: 0 8px 32px rgba(0,0,0,0.1);
+        border: 1px solid #e1e5e9;
+    }
+    .input-container {
+        background: #f8f9fa;
+        border-radius: 12px;
+        padding: 1.5rem;
+        margin-top: 1rem;
+    }
+    .examples-section {
+        background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+        color: white;
+        padding: 2rem;
+        border-radius: 15px;
+        margin: 2rem 0;
+    }
+    .examples-section h3 {
+        margin: 0 0 1rem 0;
+        font-size: 1.5rem;
+    }
+    .resources-section {
+        background: #f8f9fa;
+        border-radius: 15px;
+        padding: 2rem;
+        margin: 2rem 0;
+        border: 1px solid #e1e5e9;
+    }
+    .footer {
+        text-align: center;
+        padding: 2rem;
+        color: #6c757d;
+        border-top: 1px solid #e1e5e9;
+        margin-top: 2rem;
+    }
+    .gradio-button {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+        border: none !important;
+        border-radius: 8px !important;
+        color: white !important;
+        font-weight: 600 !important;
+        padding: 12px 24px !important;
+        transition: all 0.3s ease !important;
+    }
+    .gradio-button:hover {
+        transform: translateY(-2px) !important;
+        box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4) !important;
+    }
+    .gradio-textbox {
+        border-radius: 12px !important;
+        border: 2px solid #e1e5e9 !important;
+        transition: border-color 0.3s ease !important;
+    }
+    .gradio-textbox:focus-within {
+        border-color: #667eea !important;
+        box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
+    }
+    .chatbot-container {
+        border-radius: 12px !important;
+        border: 1px solid #e1e5e9 !important;
+        background: white !important;
+    }
+    """
+    with gr.Blocks(
+        title="Web Accessibility Learning Assistant",
+        css=custom_css
+    ) as demo:
+        # Header
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("""
+                <div class="main-header">
+                    <h1>🌐 Web Accessibility Learning Assistant</h1>
+                    <p>Your personal tutor for mastering web accessibility using authoritative WebAIM resources</p>
+                </div>
+                """)
+        # Feature highlights
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("""
+                <div class="feature-grid">
+                    <div class="feature-card">
+                        <h3>📋 WCAG Guidelines</h3>
+                        <p>Master success criteria and implementation strategies with expert guidance</p>
+                    </div>
+                    <div class="feature-card">
+                        <h3>🔍 Screen Reader Testing</h3>
+                        <p>Learn how to test with assistive technologies like NVDA and JAWS</p>
+                    </div>
+                    <div class="feature-card">
+                        <h3>💻 Code Examples</h3>
+                        <p>Get practical HTML, CSS, and JavaScript patterns for accessibility</p>
+                    </div>
+                    <div class="feature-card">
+                        <h3>🎯 Best Practices</h3>
+                        <p>Discover real-world accessibility solutions and common pitfalls</p>
+                    </div>
+                </div>
+                """)
+        # Main chat interface
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML('<div class="chat-container">')
+                chatbot_interface = gr.Chatbot(
+                    height=600,
+                    placeholder="👋 Ask me anything about web accessibility! I'm here to help you learn.",
+                    show_label=False,
+                    container=True,
+                    bubble_full_width=False,
+                    elem_classes=["chatbot-container"]
+                )
+                gr.HTML('</div>')
+        # Input section
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML('<div class="input-container">')
+                msg = gr.Textbox(
+                    placeholder="Type your question here... (e.g., 'How do I write good alt text?' or 'What are the WCAG contrast requirements?')",
+                    label="Your Question",
+                    lines=3,
+                    max_lines=6,
+                    elem_classes=["gradio-textbox"]
+                )
+                with gr.Row():
+                    clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary", size="sm")
+                    submit_btn = gr.Button("🚀 Ask Question", variant="primary", size="lg")
+                gr.HTML('</div>')
+        # Quick start examples
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("""
+                <div class="examples-section">
+                    <h3>🚀 Quick Start Examples</h3>
+                    <p>Click any example below to get started with common accessibility questions:</p>
+                </div>
+                """)
+                gr.Examples(
+                    examples=[
+                        "What are the WCAG 2.1 AA requirements for color contrast?",
+                        "How do I make forms accessible to screen readers?",
+                        "What's the difference between aria-label and aria-labelledby?",
+                        "How can I test my website with a screen reader?",
+                        "What are the most common accessibility mistakes students make?",
+                        "How do I write effective alt text for complex images?",
+                        "What ARIA roles should I use for a navigation menu?",
+                        "How do I make data tables accessible?",
+                        "What are the keyboard navigation requirements?",
+                        "How do I ensure my site works without JavaScript?"
+                    ],
+                    inputs=msg,
+                    examples_per_page=5,
+                    label="Example Questions"
+                )
+        # Additional resources
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("""
+                <div class="resources-section">
+                    <h3>📚 Additional Learning Resources</h3>
+                </div>
+                """)
+                with gr.Accordion("🛠️ Recommended Tools", open=False):
+                    gr.Markdown("""
+                    ### Essential Accessibility Testing Tools:
+                    **🔍 Automated Testing:**
+                    - **WAVE**: Web accessibility evaluation tool (wave.webaim.org)
+                    - **axe DevTools**: Browser extension for accessibility testing
+                    - **Lighthouse**: Built-in accessibility audit in Chrome DevTools
+                    - **HTML_CodeSniffer**: Bookmarklet for quick accessibility checks
+                    **🎧 Screen Readers:**
+                    - **NVDA**: Free screen reader for Windows
+                    - **JAWS**: Professional screen reader (paid)
+                    - **VoiceOver**: Built-in screen reader for macOS
+                    - **TalkBack**: Android screen reader
+                    **🎨 Color & Contrast:**
+                    - **WebAIM Contrast Checker**: Verify color contrast ratios
+                    - **Color Oracle**: Simulate color blindness
+                    - **Stark**: Design tool with accessibility features
+                    """)
+                with gr.Accordion("📋 Key Standards & Guidelines", open=False):
+                    gr.Markdown("""
+                    ### Web Accessibility Standards:
+                    **🌐 WCAG 2.1:**
+                    - **Level A**: Basic accessibility requirements
+                    - **Level AA**: Standard compliance (most common target)
+                    - **Level AAA**: Highest level of accessibility
+                    **🇺🇸 US Standards:**
+                    - **Section 508**: Federal accessibility requirements
+                    - **ADA**: Americans with Disabilities Act considerations
+                    - **CVAA**: 21st Century Communications and Video Accessibility Act
+                    **🌍 International:**
+                    - **EN 301 549**: European accessibility standard
+                    - **ISO 9241-171**: International ergonomics standard
+                    """)
+        # Footer
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("""
+                <div class="footer">
+                    <p><strong>This chatbot uses authoritative WebAIM resources and is powered by DeepSeek-R1.</strong></p>
+                    <p>For the most up-to-date information, always refer to the original WebAIM documentation at <a href="https://webaim.org" target="_blank">webaim.org</a></p>
+                </div>
+                """)
+        # Handle message submission
+        def respond(message, history):
+            if not message.strip():
+                return history, ""
+            response = chatbot.generate_response(message, history)
+            history.append((message, response))
+            return history, ""
+        def clear_chat():
+            return [], ""
+        # Event handlers
+        msg.submit(respond, [msg, chatbot_interface], [chatbot_interface, msg])
+        submit_btn.click(respond, [msg, chatbot_interface], [chatbot_interface, msg])
+        clear_btn.click(clear_chat, outputs=[chatbot_interface, msg])
+    return demo
+# Launch the app
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )

knowledge_base.json ADDED Viewed

The diff for this file is too large to render. See raw diff

pdf_processor.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import fitz  # PyMuPDF
+import json
+import os
+import re
+from sentence_transformers import SentenceTransformer
+import pickle
+class PDFProcessor:
+    def __init__(self, pdf_directory="/Users/maraksa/Downloads/chatbot/WebAIM/"):
+        self.pdf_directory = pdf_directory
+        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
+        # Check if directory exists
+        if not os.path.exists(pdf_directory):
+            os.makedirs(pdf_directory)
+            print(f"Created directory: {pdf_directory}")
+            print("Please add your WebAIM PDF files to this directory.")
+    def clean_text(self, text):
+        """Clean extracted text from PDF"""
+        # Remove extra whitespace and line breaks
+        text = re.sub(r'\s+', ' ', text)
+        # Remove common PDF artifacts
+        text = re.sub(r'Page \d+ of \d+', '', text)
+        text = re.sub(r'WebAIM.*?\n', '', text)
+        return text.strip()
+    def extract_text_from_pdf(self, pdf_path):
+        """Extract text from PDF with page information"""
+        print(f"Processing: {os.path.basename(pdf_path)}")
+        doc = fitz.open(pdf_path)
+        pages_content = []
+        for page_num in range(len(doc)):
+            page = doc[page_num]
+            text = page.get_text()
+            # Clean the text
+            cleaned_text = self.clean_text(text)
+            # Skip pages with very little content
+            if len(cleaned_text) < 50:
+                continue
+            # Clean and chunk text
+            chunks = self.chunk_text(cleaned_text, chunk_size=500)
+            for chunk_idx, chunk in enumerate(chunks):
+                if len(chunk.strip()) > 30:  # Only keep substantial chunks
+                    pages_content.append({
+                        'text': chunk,
+                        'source_file': os.path.basename(pdf_path),
+                        'page_number': page_num + 1,
+                        'chunk_id': chunk_idx,
+                        'source_type': 'WebAIM'
+                    })
+        doc.close()
+        print(f"✅ Extracted {len(pages_content)} chunks from {os.path.basename(pdf_path)}")
+        return pages_content
+    def chunk_text(self, text, chunk_size=500, overlap=50):
+        """Split text into overlapping chunks"""
+        words = text.split()
+        chunks = []
+        for i in range(0, len(words), chunk_size - overlap):
+            chunk = ' '.join(words[i:i + chunk_size])
+            if chunk.strip():
+                chunks.append(chunk.strip())
+        return chunks
+    def process_all_pdfs(self):
+        """Process all PDFs in the directory"""
+        all_content = []
+        # Check if PDFs exist
+        pdf_files = [f for f in os.listdir(self.pdf_directory) if f.endswith('.pdf')]
+        if not pdf_files:
+            print(f"❌ No PDF files found in {self.pdf_directory}")
+            print("Please add your WebAIM PDF files to the pdfs/ directory")
+            return []
+        print(f"Found {len(pdf_files)} PDF files:")
+        for pdf_file in pdf_files:
+            print(f"  - {pdf_file}")
+        for filename in pdf_files:
+            pdf_path = os.path.join(self.pdf_directory, filename)
+            try:
+                content = self.extract_text_from_pdf(pdf_path)
+                all_content.extend(content)
+            except Exception as e:
+                print(f"❌ Error processing {filename}: {str(e)}")
+        return all_content
+    def create_knowledge_base(self, output_path="knowledge_base.json"):
+        """Create searchable knowledge base from PDFs"""
+        print("🚀 Starting PDF processing...")
+        all_content = self.process_all_pdfs()
+        if not all_content:
+            print("❌ No content extracted. Please check your PDF files.")
+            return None
+        print(f"📄 Total chunks extracted: {len(all_content)}")
+        print("🧠 Creating embeddings... (this may take a few minutes)")
+        texts = [item['text'] for item in all_content]
+        embeddings = self.embedder.encode(texts, show_progress_bar=True)
+        # Save knowledge base
+        knowledge_base = {
+            'content': all_content,
+            'embeddings': embeddings.tolist(),
+            'metadata': {
+                'total_chunks': len(all_content),
+                'embedding_model': 'all-MiniLM-L6-v2',
+                'chunk_size': 500,
+                'overlap': 50
+            }
+        }
+        with open(output_path, 'w') as f:
+            json.dump(knowledge_base, f, indent=2)
+        print(f"✅ Knowledge base saved to {output_path}")
+        print(f"📊 Summary:")
+        print(f"   - Total chunks: {len(all_content)}")
+        print(f"   - Embedding dimensions: {len(embeddings[0])}")
+        print(f"   - File size: {os.path.getsize(output_path) / 1024 / 1024:.2f} MB")
+        return knowledge_base
+# Usage
+if __name__ == "__main__":
+    processor = PDFProcessor()
+    knowledge_base = processor.create_knowledge_base()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.0.0
+huggingface_hub>=0.20.0
+sentence-transformers>=2.2.0
+scikit-learn>=1.3.0
+numpy>=1.24.0
+PyMuPDF>=1.23.0
+python-dotenv>=1.0.0

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (147 Bytes). View file

utils/__pycache__/retriever.cpython-312.pyc ADDED Viewed

Binary file (2.74 kB). View file

utils/retriever.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import json
+import numpy as np
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+class KnowledgeRetriever:
+    def __init__(self, knowledge_base_path="knowledge_base.json"):
+        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
+        # Load knowledge base
+        with open(knowledge_base_path, 'r') as f:
+            self.kb = json.load(f)
+        self.content = self.kb['content']
+        self.embeddings = np.array(self.kb['embeddings'])
+    def retrieve_relevant_content(self, query, top_k=5, min_similarity=0.3):
+        """Retrieve most relevant content for the query"""
+        # Encode query
+        query_embedding = self.embedder.encode([query])
+        # Calculate similarities
+        similarities = cosine_similarity(query_embedding, self.embeddings)[0]
+        # Get top results above threshold
+        top_indices = np.argsort(similarities)[-top_k:][::-1]
+        relevant_content = []
+        for idx in top_indices:
+            if similarities[idx] >= min_similarity:
+                content_item = self.content[idx].copy()
+                content_item['similarity_score'] = float(similarities[idx])
+                relevant_content.append(content_item)
+        return relevant_content
+    def format_context_for_llm(self, relevant_content):
+        """Format retrieved content for LLM context"""
+        if not relevant_content:
+            return "No relevant information found in WebAIM resources."
+        context = "Relevant information from WebAIM resources:\n\n"
+        for i, item in enumerate(relevant_content, 1):
+            context += f"[Source {i}] From {item['source_file']} (Page {item['page_number']}):\n"
+            context += f"{item['text']}\n\n"
+        return context