the-carnage commited on
Commit
bfc5e7a
Β·
1 Parent(s): 92751a2

Refactor: Switch from pipeline to direct model usage to resolve task registry errors

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
  from PIL import Image
4
  import pytesseract
5
  import pdfplumber
@@ -11,9 +11,11 @@ st.write("Summarize text, images, or PDFs with AI")
11
 
12
  @st.cache_resource
13
  def load_model():
14
- return pipeline("summarization", model="t5-small")
 
 
15
 
16
- summarizer = load_model()
17
 
18
  def extract_text_from_image(image):
19
  """Extract text from image using OCR"""
@@ -33,9 +35,10 @@ def summarize_text(text):
33
  """Summarize the given text"""
34
  if not text.strip():
35
  return None
36
- # input_text = "summarize: " + text[:4000]
37
- result = summarizer(text[:4000], max_length=150, min_length=40, do_sample=False)
38
- return result[0]["summary_text"]
 
39
 
40
  tab1, tab2, tab3 = st.tabs(["πŸ“ Text", "πŸ–ΌοΈ Image", "πŸ“„ PDF"])
41
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
  from PIL import Image
4
  import pytesseract
5
  import pdfplumber
 
11
 
12
  @st.cache_resource
13
  def load_model():
14
+ tokenizer = AutoTokenizer.from_pretrained("t5-small")
15
+ model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
16
+ return tokenizer, model
17
 
18
+ tokenizer, model = load_model()
19
 
20
  def extract_text_from_image(image):
21
  """Extract text from image using OCR"""
 
35
  """Summarize the given text"""
36
  if not text.strip():
37
  return None
38
+ input_text = "summarize: " + text[:4000]
39
+ inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
40
+ summary_ids = model.generate(inputs.input_ids, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
41
+ return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
42
 
43
  tab1, tab2, tab3 = st.tabs(["πŸ“ Text", "πŸ–ΌοΈ Image", "πŸ“„ PDF"])
44