File size: 2,827 Bytes
5b83243
 
3e68be8
 
 
 
 
 
 
 
5b83243
3e68be8
 
 
5b83243
3e68be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b83243
3e68be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b83243
3e68be8
 
 
 
 
 
 
 
 
 
5b83243
3e68be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5b83243
3e68be8
5b83243
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import gradio as gr
from huggingface_hub import InferenceClient
import os
import requests
import json
import pytesseract
from PIL import Image
import PyPDF2
from io import BytesIO
import docx

# Initialize clients
API_KEY = os.environ.get("HF_API_KEY")
client = InferenceClient(token=API_KEY)

def process_file(file):
    """Handle different file types and extract text"""
    if file is None:
        return ""
        
    # Get file extension
    ext = file.name.split('.')[-1].lower()
    
    try:
        if ext in ['png', 'jpg', 'jpeg']:
            # OCR processing for images
            image = Image.open(file.name)
            text = pytesseract.image_to_string(image)
            return f"IMAGE CONTENT:\n{text}"
            
        elif ext == 'pdf':
            # PDF text extraction
            pdf_reader = PyPDF2.PdfReader(file.name)
            text = "\n".join([page.extract_text() for page in pdf_reader.pages])
            return f"PDF CONTENT:\n{text}"
            
        elif ext == 'docx':
            # Word document processing
            doc = docx.Document(file.name)
            text = "\n".join([para.text for para in doc.paragraphs])
            return f"DOCUMENT CONTENT:\n{text}"
            
        else:
            return "Unsupported file type"
            
    except Exception as e:
        print(f"File processing error: {e}")
        return "Error reading file"

def chat(message, history, file):
    # Process uploaded file
    file_content = process_file(file) if file else ""
    
    # Build enhanced prompt
    full_prompt = f"""
    {file_content}
    
    User Message: {message}
    
    Please respond considering both the message and any attached documents:"""
    
    # Configure generation parameters
    generate_kwargs = dict(
        temperature=0.7,
        max_new_tokens=2000,
        top_p=0.95,
        repetition_penalty=1.2,
    )
    
    # Generate response
    stream = client.text_generation(
        full_prompt,
        stream=True,
        details=True,
        **generate_kwargs
    )
    
    partial_message = ""
    for response in stream:
        if response.token.special:
            continue
        partial_message += response.token.text
        yield partial_message

# Create Gradio interface with file upload
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# DeepSeek-R1 Assistant with File Support")
    gr.Markdown("Upload images, PDFs, or docs and chat about them!")
    
    with gr.Row():
        file_input = gr.File(label="Upload File (PDF/Image/Doc)", type="file")
        
    chatbot = gr.ChatInterface(
        fn=chat,
        additional_inputs=[file_input],
        examples=[
            ["Explain this document", "report.pdf"],
            ["What's in this image?", "screenshot.png"]
        ]
    )

demo.launch()