King-8 commited on
Commit
cfc6eff
·
verified ·
1 Parent(s): 3740dcd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -24
app.py CHANGED
@@ -1,28 +1,18 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- from PIL import Image
4
- import pytesseract
5
- import shutil, os
6
 
7
- print("Checking tesseract path:", pytesseract.pytesseract.tesseract_cmd)
8
- print("Does it exist?", os.path.exists(pytesseract.pytesseract.tesseract_cmd))
 
9
 
10
- cache_dir = os.path.expanduser("~/.cache/huggingface")
11
- if os.path.exists(cache_dir):
12
- shutil.rmtree(cache_dir)
13
 
14
- # explicitly set path to tesseract
15
- pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
 
16
 
17
- # Load your trained model from Hugging Face Hub
18
- model_id = "King-8/flyer-event-extractor" # replace with your actual model repo
19
- extractor = pipeline("text2text-generation", model=model_id)
20
-
21
- def extract_event_details(flyer_text):
22
- """Extract structured details from flyer text using fine-tuned model"""
23
- output = extractor(f"Flyer text: {flyer_text}", max_length=128)[0]["generated_text"]
24
-
25
- # Try to parse into Event, Date, Location
26
  event, date, location = "", "", ""
27
  if "Event:" in output and "Date:" in output and "Location:" in output:
28
  try:
@@ -33,20 +23,20 @@ def extract_event_details(flyer_text):
33
  except:
34
  event = output
35
  else:
36
- event = output # fallback
37
 
38
  return event, date, location, output
39
 
40
  def process_image(image):
41
- """Run OCR on uploaded flyer image, then extract details with model"""
42
- flyer_text = pytesseract.image_to_string(Image.open(image))
43
- return extract_event_details(flyer_text)
44
 
45
  with gr.Blocks() as demo:
46
  gr.Markdown("## 📅 Flyer Event Extractor\nUpload a flyer image or paste text to extract event details.")
47
 
48
  with gr.Tab("Upload Flyer Image"):
49
- img_input = gr.Image(type="filepath", label="Upload Flyer Image")
50
  btn_img = gr.Button("Extract from Image")
51
  event_out_img = gr.Textbox(label="Event Name")
52
  date_out_img = gr.Textbox(label="Date & Time")
 
1
  import gradio as gr
2
  from transformers import pipeline
 
 
 
3
 
4
+ # Load your trained event extractor
5
+ event_model_id = "King-8/flyer-event-extractor"
6
+ event_extractor = pipeline("text2text-generation", model=event_model_id)
7
 
8
+ # Load an OCR model (image → text)
9
+ ocr_model = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
 
10
 
11
+ def extract_event_details(text):
12
+ """Extract event details from flyer text using your fine-tuned model"""
13
+ output = event_extractor(f"Flyer text: {text}", max_length=128)[0]["generated_text"]
14
 
15
+ # Try parsing into fields
 
 
 
 
 
 
 
 
16
  event, date, location = "", "", ""
17
  if "Event:" in output and "Date:" in output and "Location:" in output:
18
  try:
 
23
  except:
24
  event = output
25
  else:
26
+ event = output # fallback if parsing fails
27
 
28
  return event, date, location, output
29
 
30
  def process_image(image):
31
+ """OCR via Hugging Face model, then event extraction"""
32
+ ocr_text = ocr_model(image)[0]["generated_text"]
33
+ return extract_event_details(ocr_text)
34
 
35
  with gr.Blocks() as demo:
36
  gr.Markdown("## 📅 Flyer Event Extractor\nUpload a flyer image or paste text to extract event details.")
37
 
38
  with gr.Tab("Upload Flyer Image"):
39
+ img_input = gr.Image(type="pil", label="Upload Flyer Image")
40
  btn_img = gr.Button("Extract from Image")
41
  event_out_img = gr.Textbox(label="Event Name")
42
  date_out_img = gr.Textbox(label="Date & Time")