Shahbazakbar commited on
Commit
790a5eb
·
verified ·
1 Parent(s): 074e3e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -42
app.py CHANGED
@@ -1,7 +1,12 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import torch
 
 
 
 
 
3
 
4
- # Load DistilGPT-2 (lightweight and fast)
5
  distilgpt2_tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
6
  distilgpt2_model = AutoModelForCausalLM.from_pretrained("distilgpt2")
7
 
@@ -9,36 +14,6 @@ distilgpt2_model = AutoModelForCausalLM.from_pretrained("distilgpt2")
9
  if torch.cuda.is_available():
10
  distilgpt2_model = distilgpt2_model.to("cuda")
11
 
12
- def generate_response(prompt):
13
- # Tokenize the input prompt
14
- inputs = distilgpt2_tokenizer(prompt, return_tensors="pt").to(distilgpt2_model.device)
15
-
16
- # Generate the response
17
- outputs = distilgpt2_model.generate(**inputs, max_length=100)
18
-
19
- # Decode the response
20
- response = distilgpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
21
- return response
22
-
23
- import gradio as gr
24
-
25
- # Gradio interface
26
- def chatbot(prompt):
27
- response = generate_response(prompt)
28
- return response
29
-
30
- interface = gr.Interface(
31
- fn=chatbot,
32
- inputs="text",
33
- outputs="text",
34
- title="DistilGPT-2 Chatbot",
35
- description="Ask questions and get answers from DistilGPT-2!"
36
- )
37
-
38
- import fitz # PyMuPDF
39
- import easyocr
40
- from PIL import Image
41
-
42
  # Function to extract text from PDF
43
  def extract_text_from_pdf(pdf_path):
44
  doc = fitz.open(pdf_path)
@@ -54,36 +29,70 @@ def extract_text_from_image(image_path):
54
  extracted_text = " ".join([res[1] for res in results])
55
  return extracted_text
56
 
 
 
 
 
 
 
 
 
57
  def chatbot(input_type, input_data):
58
  if input_type == "Text":
59
  prompt = input_data
60
  elif input_type == "PDF":
 
 
61
  pdf_text = extract_text_from_pdf(input_data)
62
  prompt = f"Extracted text from PDF:\n{pdf_text}\n\nQuestion: {input_data}"
63
  elif input_type == "Image":
 
 
64
  image_text = extract_text_from_image(input_data)
65
  prompt = f"Extracted text from image:\n{image_text}\n\nQuestion: {input_data}"
66
  else:
67
  return "Invalid input type."
68
 
69
- # Generate response using the selected model
70
  response = generate_response(prompt)
71
  return response
72
 
73
- # List of input types
74
- INPUT_TYPES = ["Text", "PDF", "Image"]
75
-
76
  # Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  interface = gr.Interface(
78
  fn=chatbot,
79
- inputs=[
80
- gr.Dropdown(choices=INPUT_TYPES, label="Input Type"),
81
- gr.Textbox(lines=2, placeholder="Enter text or upload a file...", label="Input")
82
- ],
83
  outputs="text",
84
  title="Lightweight Chatbot with PDF and Image Support",
85
- description="Select the input type (Text, PDF, or Image) and ask your question!"
86
  )
87
 
88
- # Launch the app
89
  interface.launch()
 
1
+ import os
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import gradio as gr
5
+ import fitz # PyMuPDF
6
+ import easyocr
7
+ from PIL import Image
8
 
9
+ # Load a lightweight model (e.g., DistilGPT-2)
10
  distilgpt2_tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
11
  distilgpt2_model = AutoModelForCausalLM.from_pretrained("distilgpt2")
12
 
 
14
  if torch.cuda.is_available():
15
  distilgpt2_model = distilgpt2_model.to("cuda")
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Function to extract text from PDF
18
  def extract_text_from_pdf(pdf_path):
19
  doc = fitz.open(pdf_path)
 
29
  extracted_text = " ".join([res[1] for res in results])
30
  return extracted_text
31
 
32
+ # Function to generate a response
33
+ def generate_response(prompt):
34
+ inputs = distilgpt2_tokenizer(prompt, return_tensors="pt").to(distilgpt2_model.device)
35
+ outputs = distilgpt2_model.generate(**inputs, max_length=100)
36
+ response = distilgpt2_tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+ return response
38
+
39
+ # Chatbot function to handle text, PDF, and image inputs
40
  def chatbot(input_type, input_data):
41
  if input_type == "Text":
42
  prompt = input_data
43
  elif input_type == "PDF":
44
+ if input_data is None:
45
+ return "Please upload a PDF file."
46
  pdf_text = extract_text_from_pdf(input_data)
47
  prompt = f"Extracted text from PDF:\n{pdf_text}\n\nQuestion: {input_data}"
48
  elif input_type == "Image":
49
+ if input_data is None:
50
+ return "Please upload an image file."
51
  image_text = extract_text_from_image(input_data)
52
  prompt = f"Extracted text from image:\n{image_text}\n\nQuestion: {input_data}"
53
  else:
54
  return "Invalid input type."
55
 
56
+ # Generate response using the model
57
  response = generate_response(prompt)
58
  return response
59
 
 
 
 
60
  # Gradio interface
61
+ input_components = [
62
+ gr.Dropdown(choices=["Text", "PDF", "Image"], label="Input Type"),
63
+ gr.Textbox(lines=2, placeholder="Enter text...", label="Text Input", visible=True),
64
+ gr.File(label="Upload PDF", file_types=[".pdf"], visible=False),
65
+ gr.Image(label="Upload Image", type="filepath", visible=False)
66
+ ]
67
+
68
+ def update_input_components(input_type):
69
+ if input_type == "Text":
70
+ return [
71
+ gr.Textbox(visible=True),
72
+ gr.File(visible=False),
73
+ gr.Image(visible=False)
74
+ ]
75
+ elif input_type == "PDF":
76
+ return [
77
+ gr.Textbox(visible=False),
78
+ gr.File(visible=True),
79
+ gr.Image(visible=False)
80
+ ]
81
+ elif input_type == "Image":
82
+ return [
83
+ gr.Textbox(visible=False),
84
+ gr.File(visible=False),
85
+ gr.Image(visible=True)
86
+ ]
87
+
88
+ # Create the Gradio interface
89
  interface = gr.Interface(
90
  fn=chatbot,
91
+ inputs=input_components,
 
 
 
92
  outputs="text",
93
  title="Lightweight Chatbot with PDF and Image Support",
94
+ description="Select the input type (Text, PDF, or Image) and provide your input."
95
  )
96
 
97
+ # Add dynamic visibility for input components
98
  interface.launch()