Spaces:

d221
/

Deepseek-R1-Assistant

Build error

File size: 2,442 Bytes


import os
import gradio as gr
from huggingface_hub import InferenceClient
import requests
import json
import pytesseract
from PIL import Image
import PyPDF2
from io import BytesIO
import docx
import ntpath



# Initialize clients
API_KEY = os.environ.get("HF_API_KEY")
client = InferenceClient(token=API_KEY)

def process_file(filepath):
    # Handle different file types and extract text.
    if not filepath:
        return ""
    
    ext = os.path.splitext(filepath)[1].lower()
    
    try:
        if ext in ['.png', '.jpg', '.jpeg']:
            image = Image.open(filepath)
            text = pytesseract.image_to_string(image)
            return f"IMAGE CONTENT:\n{text}"
        elif ext == '.pdf':
            pdf_reader = PyPDF2.PdfReader(filepath)
            text = "\n".join(page.extract_text() for page in pdf_reader.pages)
            return f"PDF CONTENT:\n{text}"
        elif ext == '.docx':
            doc = docx.Document(filepath)
            text = "\n".join(para.text for para in doc.paragraphs)
            return f"DOCUMENT CONTENT:\n{text}"
        else:
            return "Unsupported file type"
    except Exception as e:
        print(f"File processing error: {e}")
        return "Error reading file"

def chat(message, history, filepath):
    file_content = process_file(filepath) if filepath else ""
    
    full_prompt = f"""
    {file_content}
    
    User Message: {message}
    
    Please respond considering both the message and any attached documents:
    """

    generate_kwargs = dict(
        temperature=0.7,
        max_new_tokens=2000,
        top_p=0.95,
        repetition_penalty=1.2,
    )
    
    # Generate response
    stream = client.text_generation(
        full_prompt,
        stream=True,
        details=True,
        **generate_kwargs
    )
    
    partial_message = ""
    for response in stream:
        if response.token.special:
            continue
        partial_message += response.token.text
        yield partial_message

with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# DeepSeek-R1 Assistant with File Support")
    gr.Markdown("Upload images, PDFs, or docs and chat about them!")
    
    with gr.Row():
        file_input = gr.File(label="Upload File (PDF/Image/Doc)", type="filepath")
        
    chatbot = gr.ChatInterface(
        fn=chat,
        additional_inputs=[file_input],
        type="messages",
        examples=[]
    )

demo.launch()