Spaces:

SlouchyBuffalo
/

Document

Running

App Files Files Community

SlouchyBuffalo commited on May 12, 2025

Commit

b6c73df

verified ·

1 Parent(s): e3567f9

Create app.py

Browse files

Files changed (1) hide show

app.py +130 -0

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import gradio as gr
+import spaces
+import PyPDF2
+import docx
+import io
+import os
+from typing import Optional
+from huggingface_hub import InferenceClient
+from prompts import SYSTEM_PROMPT, PROMPTS
+def extract_text_from_file(file) -> str:
+    """Extract text from uploaded files"""
+    if file is None:
+        return ""
+    file_path = file.name
+    text = ""
+    try:
+        if file_path.endswith('.pdf'):
+            with open(file_path, 'rb') as f:
+                reader = PyPDF2.PdfReader(f)
+                for page in reader.pages:
+                    text += page.extract_text() + "\n"
+        elif file_path.endswith('.docx'):
+            doc = docx.Document(file_path)
+            for paragraph in doc.paragraphs:
+                text += paragraph.text + "\n"
+        elif file_path.endswith('.txt'):
+            with open(file_path, 'r', encoding='utf-8') as f:
+                text = f.read()
+    except Exception as e:
+        return f"Error reading file: {str(e)}"
+    return text
+@spaces.GPU
+def process_document(document, operation_type, text_input):
+    """Main processing function using Cerebras Llama through HuggingFace"""
+    # Extract text from file or use text input
+    if document is not None:
+        text = extract_text_from_file(document)
+    else:
+        text = text_input
+    if not text.strip():
+        return "Please provide either a document or text input."
+    # Get the appropriate prompt
+    prompt = PROMPTS.get(operation_type, "")
+    # Create the client with Cerebras provider
+    try:
+        client = InferenceClient(
+            "meta-llama/Llama-3.3-70B-Instruct",
+            provider="cerebras",
+            token=os.getenv("HF_TOKEN"),
+        )
+        # Create conversation messages
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": f"{prompt}\n\nDocument content:\n{text}"}
+        ]
+        # Generate response using chat completion
+        response = client.chat_completion(
+            messages=messages,
+            max_tokens=3000,
+            temperature=0.1,
+            stream=False
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error: {str(e)}\n\nPlease ensure:\n1. HF_TOKEN is set in settings\n2. You have Pro access to use Cerebras inference\n3. The Cerebras/Llama integration is enabled in your account"
+# Create the Gradio interface
+with gr.Blocks(title="Study Assistant", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📚 Study Assistant - Document Analysis Tool")
+    gr.Markdown("Upload a document or paste text, then select the type of analysis you want to perform.")
+    gr.Markdown("*Powered by Meta Llama-3.3-70B via Cerebras on HuggingFace*")
+    with gr.Row():
+        with gr.Column():
+            document = gr.File(
+                label="Upload Document",
+                file_types=[".pdf", ".docx", ".txt"],
+                file_count="single"
+            )
+            text_input = gr.Textbox(
+                label="Or paste text directly",
+                lines=5,
+                placeholder="Paste your text here if you don't want to upload a file..."
+            )
+        with gr.Column():
+            operation_type = gr.Dropdown(
+                choices=["Summary", "Outline", "Analysis", "Study Guide", "Table", "Questions"],
+                label="Select Operation",
+                value="Summary"
+            )
+            process_btn = gr.Button("🚀 Process Document", variant="primary", size="lg")
+    output = gr.Textbox(
+        label="Output",
+        lines=20,
+        show_copy_button=True
+    )
+    gr.Markdown("---")
+    gr.Markdown("### Tips:")
+    gr.Markdown("- Supported formats: PDF, DOCX, TXT")
+    gr.Markdown("- Maximum file size: 200MB")
+    gr.Markdown("- Text can be pasted directly if you don't have a file")
+    gr.Markdown("- Requires HuggingFace Pro account with Cerebras access")
+    process_btn.click(
+        fn=process_document,
+        inputs=[document, operation_type, text_input],
+        outputs=output,
+        show_progress=True
+    )
+if __name__ == "__main__":
+    demo.launch()