import gradio as gr from huggingface_hub import InferenceClient import os import requests import json import pytesseract from PIL import Image import PyPDF2 from io import BytesIO import docx # Initialize clients API_KEY = os.environ.get("HF_API_KEY") client = InferenceClient(token=API_KEY) def process_file(file): """Handle different file types and extract text""" if file is None: return "" # Get file extension ext = file.name.split('.')[-1].lower() try: if ext in ['png', 'jpg', 'jpeg']: # OCR processing for images image = Image.open(file.name) text = pytesseract.image_to_string(image) return f"IMAGE CONTENT:\n{text}" elif ext == 'pdf': # PDF text extraction pdf_reader = PyPDF2.PdfReader(file.name) text = "\n".join([page.extract_text() for page in pdf_reader.pages]) return f"PDF CONTENT:\n{text}" elif ext == 'docx': # Word document processing doc = docx.Document(file.name) text = "\n".join([para.text for para in doc.paragraphs]) return f"DOCUMENT CONTENT:\n{text}" else: return "Unsupported file type" except Exception as e: print(f"File processing error: {e}") return "Error reading file" def chat(message, history, file): # Process uploaded file file_content = process_file(file) if file else "" # Build enhanced prompt full_prompt = f""" {file_content} User Message: {message} Please respond considering both the message and any attached documents:""" # Configure generation parameters generate_kwargs = dict( temperature=0.7, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.2, ) # Generate response stream = client.text_generation( full_prompt, stream=True, details=True, **generate_kwargs ) partial_message = "" for response in stream: if response.token.special: continue partial_message += response.token.text yield partial_message # Create Gradio interface with file upload with gr.Blocks(theme="soft") as demo: gr.Markdown("# DeepSeek-R1 Assistant with File Support") gr.Markdown("Upload images, PDFs, or docs and chat about them!") with gr.Row(): file_input = gr.File(label="Upload File (PDF/Image/Doc)", type="file") chatbot = gr.ChatInterface( fn=chat, additional_inputs=[file_input], examples=[ ["Explain this document", "report.pdf"], ["What's in this image?", "screenshot.png"] ] ) demo.launch()