Spaces:
Build error
Build error
File size: 2,442 Bytes
0bd1aff 5b83243 e4f8319 441fbe3 5b83243 0bd1aff 2024002 0bd1aff 2024002 0bd1aff 2024002 0bd1aff 2024002 e4f8319 2024002 5b83243 e4f8319 2024002 e4f8319 2024002 e4f8319 2024002 5b83243 e4f8319 2024002 e4f8319 2024002 0bd1aff 2024002 5b83243 69b7f43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import os
import gradio as gr
from huggingface_hub import InferenceClient
import requests
import json
import pytesseract
from PIL import Image
import PyPDF2
from io import BytesIO
import docx
import ntpath
# Initialize clients
API_KEY = os.environ.get("HF_API_KEY")
client = InferenceClient(token=API_KEY)
def process_file(filepath):
# Handle different file types and extract text.
if not filepath:
return ""
ext = os.path.splitext(filepath)[1].lower()
try:
if ext in ['.png', '.jpg', '.jpeg']:
image = Image.open(filepath)
text = pytesseract.image_to_string(image)
return f"IMAGE CONTENT:\n{text}"
elif ext == '.pdf':
pdf_reader = PyPDF2.PdfReader(filepath)
text = "\n".join(page.extract_text() for page in pdf_reader.pages)
return f"PDF CONTENT:\n{text}"
elif ext == '.docx':
doc = docx.Document(filepath)
text = "\n".join(para.text for para in doc.paragraphs)
return f"DOCUMENT CONTENT:\n{text}"
else:
return "Unsupported file type"
except Exception as e:
print(f"File processing error: {e}")
return "Error reading file"
def chat(message, history, filepath):
file_content = process_file(filepath) if filepath else ""
full_prompt = f"""
{file_content}
User Message: {message}
Please respond considering both the message and any attached documents:
"""
generate_kwargs = dict(
temperature=0.7,
max_new_tokens=2000,
top_p=0.95,
repetition_penalty=1.2,
)
# Generate response
stream = client.text_generation(
full_prompt,
stream=True,
details=True,
**generate_kwargs
)
partial_message = ""
for response in stream:
if response.token.special:
continue
partial_message += response.token.text
yield partial_message
with gr.Blocks(theme="soft") as demo:
gr.Markdown("# DeepSeek-R1 Assistant with File Support")
gr.Markdown("Upload images, PDFs, or docs and chat about them!")
with gr.Row():
file_input = gr.File(label="Upload File (PDF/Image/Doc)", type="filepath")
chatbot = gr.ChatInterface(
fn=chat,
additional_inputs=[file_input],
type="messages",
examples=[]
)
demo.launch()
|