| | import os |
| | import gradio as gr |
| | from transformers import ViTFeatureExtractor, ViTModel |
| | from PIL import Image |
| | from transformers import AutoTokenizer, AutoModel |
| | import torch |
| | from pdf2image import convert_from_path |
| | import io |
| | from io import BytesIO |
| |
|
| | |
| | css = """ |
| | .button { |
| | padding: 10px 20px; |
| | background: #007BFF; |
| | color: white; |
| | border: none; |
| | cursor: pointer; |
| | font-size: 16px; |
| | margin: 10px; |
| | } |
| | """ |
| |
|
| | |
| | layout = [ |
| | gr.Row([gr.File(label="Upload PDF", type="binary")]), |
| | gr.Row([gr.Button("Generate Insights")]), |
| | gr.Row([gr.Textbox("Placeholder for PDF insights", label="Insights", type="text")]) |
| | ] |
| |
|
| | |
| | def get_image_embeddings(image_path, model_name='google/vit-base-patch16-224'): |
| | feature_extractor = ViTFeatureExtractor.from_pretrained(model_name) |
| | model = ViTModel.from_pretrained(model_name) |
| | |
| | image = Image.open(image_path) |
| | inputs = feature_extractor(images=image, return_tensors="pt") |
| | outputs = model(**inputs) |
| | embeddings = outputs.last_hidden_state.mean(dim=1) |
| | return embeddings |
| |
|
| | |
| | def pdf_to_images(pdf_file, img_dir): |
| | images = convert_from_path(pdf_file) |
| | |
| | |
| | os.makedirs(img_dir, exist_ok=True) |
| |
|
| | for i, image in enumerate(images): |
| | image_path = f"{img_dir}/page_{i + 1}.png" |
| | image.save(image_path, "PNG") |
| |
|
| | print(f"Converted {len(images)} pages to images and saved in {img_dir}") |
| |
|
| | |
| | def get_text_embeddings(text, model_name='bert-base-uncased'): |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| | model = AutoModel.from_pretrained(model_name) |
| | |
| | inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512) |
| | outputs = model(**inputs) |
| | embeddings = outputs.last_hidden_state.mean(dim=1) |
| | return embeddings |
| |
|
| | |
| | def process_pdf_and_generate_response(pdf_file): |
| | try: |
| | |
| | pdf_file_stream = BytesIO(pdf_file) |
| |
|
| | |
| | img_dir = "pdf_images" |
| | pdf_to_images(pdf_file_stream, img_dir) |
| |
|
| | |
| | image_embeddings = [] |
| | for filename in os.listdir(img_dir): |
| | if filename.endswith(".png"): |
| | image_path = os.path.join(img_dir, filename) |
| | image_embeddings.append(get_image_embeddings(image_path)) |
| |
|
| | |
| | pdf_text = "PDF content analysis placeholder" |
| | text_embeddings = get_text_embeddings(pdf_text) |
| |
|
| | |
| | combined_embeddings = torch.cat([*image_embeddings, text_embeddings], dim=0) |
| | response = "Response based on the processed PDF" |
| | except Exception as e: |
| | response = f"An error occurred: {str(e)}" |
| | return response |
| |
|
| | iface = gr.Interface( |
| | fn=process_pdf_and_generate_response, |
| | inputs=gr.File(label="Upload PDF", type="binary"), |
| | outputs=gr.Textbox("Placeholder for PDF insights", label="Insights", type="text"), |
| | title="pdf-chatbot", |
| | description="Upload a PDF and receive insights based on its content.", |
| | css=css |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | iface.launch() |
| |
|
| |
|