girishwangikar commited on
Commit
a914df4
·
verified ·
1 Parent(s): 10be217

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -46
app.py CHANGED
@@ -12,6 +12,7 @@ groq_api_key = os.environ.get('GROQ_API_KEY')
12
 
13
  # Set up LLM
14
  llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=groq_api_key)
 
15
  def extract_text_from_pdf(pdf_file):
16
  pdf_reader = PyPDF2.PdfReader(pdf_file)
17
  text = ""
@@ -21,36 +22,39 @@ def extract_text_from_pdf(pdf_file):
21
 
22
  def chunk_text(text):
23
  text_splitter = RecursiveCharacterTextSplitter(
24
- chunk_size=4000,
25
- chunk_overlap=400,
26
  length_function=len
27
  )
28
  chunks = text_splitter.split_text(text)
29
  return [Document(page_content=chunk) for chunk in chunks]
30
 
31
- def summarize_chunks(chunks):
32
- # Prompt for the initial summarization of each chunk
33
- map_prompt_template = """Write a detailed summary of the following text:
34
- "{text}"
35
- DETAILED SUMMARY:"""
 
 
 
 
 
 
 
 
 
36
  map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])
37
-
38
- # Prompt for combining the summaries
39
- combine_prompt_template = """Write a comprehensive summary of the following text, capturing key points and main ideas:
40
- "{text}"
41
- COMPREHENSIVE SUMMARY:"""
42
  combine_prompt = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
43
-
44
- # Check the total length of the chunks
45
  total_length = sum(len(chunk.page_content) for chunk in chunks)
46
-
47
- if total_length < 10000: # For shorter documents
48
  chain = load_summarize_chain(
49
- llm,
50
- chain_type="stuff",
51
  prompt=combine_prompt
52
  )
53
- else: # For longer documents
54
  chain = load_summarize_chain(
55
  llm,
56
  chain_type="map_reduce",
@@ -58,59 +62,61 @@ def summarize_chunks(chunks):
58
  combine_prompt=combine_prompt,
59
  verbose=True
60
  )
61
-
62
  summary = chain.run(chunks)
63
  return summary
64
 
65
- def summarize_content(pdf_file, text_input):
66
  if pdf_file is None and not text_input:
67
  return "Please upload a PDF file or enter text to summarize."
68
-
69
  if pdf_file is not None:
70
  # Extract text from PDF
71
  text = extract_text_from_pdf(pdf_file)
72
  else:
73
  # Use the input text
74
  text = text_input
75
-
76
  # Chunk the text
77
  chunks = chunk_text(text)
78
-
79
- # Summarize chunks
80
- final_summary = summarize_chunks(chunks)
81
  return final_summary
82
 
83
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
84
  gr.Markdown(
85
- """
86
- # PDF And Text Summarizer
87
- ### Advanced PDF and Text Summarization -
88
-
89
- Upload your PDF document or enter text directly, and let AI generate a concise, informative summary.
90
- """
91
  )
92
-
93
  with gr.Row():
94
  with gr.Column(scale=1):
95
  input_pdf = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
96
  input_text = gr.Textbox(label="Or enter text here", lines=5, placeholder="Paste or type your text here...")
 
97
  submit_btn = gr.Button("Generate Summary", variant="primary")
98
-
99
  with gr.Column(scale=2):
100
  output = gr.Textbox(label="Generated Summary", lines=10)
101
-
102
  gr.Markdown(
103
- """
104
- ### How it works
105
- 1. Upload a PDF file or enter text directly
106
- 2. Click "Generate Summary"
107
- 3. Wait for the AI to process and summarize your content
108
- 4. Review the generated summary
109
-
110
- *Powered by LLAMA 3.1 8B model and LangChain*
111
- """
 
 
112
  )
113
-
114
- submit_btn.click(summarize_content, inputs=[input_pdf, input_text], outputs=output)
115
 
116
  iface.launch()
 
12
 
13
  # Set up LLM
14
  llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=groq_api_key)
15
+
16
  def extract_text_from_pdf(pdf_file):
17
  pdf_reader = PyPDF2.PdfReader(pdf_file)
18
  text = ""
 
22
 
23
  def chunk_text(text):
24
  text_splitter = RecursiveCharacterTextSplitter(
25
+ chunk_size=4000,
26
+ chunk_overlap=400,
27
  length_function=len
28
  )
29
  chunks = text_splitter.split_text(text)
30
  return [Document(page_content=chunk) for chunk in chunks]
31
 
32
+ def summarize_chunks(chunks, conciseness):
33
+ # Adjust the prompts based on the conciseness level
34
+ map_prompt_template = f"""Write a {'very concise' if conciseness > 0.7 else 'detailed'} summary of the following text, focusing on the {'most crucial' if conciseness > 0.7 else 'key'} points:
35
+
36
+ "{{text}}"
37
+
38
+ {'CONCISE' if conciseness > 0.7 else 'DETAILED'} SUMMARY:"""
39
+
40
+ combine_prompt_template = f"""Write a {'highly condensed' if conciseness > 0.7 else 'comprehensive'} summary of the following text, capturing the {'essential' if conciseness > 0.7 else 'key'} points and main ideas:
41
+
42
+ "{{text}}"
43
+
44
+ {'CONDENSED' if conciseness > 0.7 else 'COMPREHENSIVE'} SUMMARY:"""
45
+
46
  map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])
 
 
 
 
 
47
  combine_prompt = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
48
+
49
+ # Adjust the chain type based on the document length and conciseness
50
  total_length = sum(len(chunk.page_content) for chunk in chunks)
51
+ if total_length < 10000 or conciseness > 0.8:
 
52
  chain = load_summarize_chain(
53
+ llm,
54
+ chain_type="stuff",
55
  prompt=combine_prompt
56
  )
57
+ else:
58
  chain = load_summarize_chain(
59
  llm,
60
  chain_type="map_reduce",
 
62
  combine_prompt=combine_prompt,
63
  verbose=True
64
  )
65
+
66
  summary = chain.run(chunks)
67
  return summary
68
 
69
+ def summarize_content(pdf_file, text_input, conciseness):
70
  if pdf_file is None and not text_input:
71
  return "Please upload a PDF file or enter text to summarize."
72
+
73
  if pdf_file is not None:
74
  # Extract text from PDF
75
  text = extract_text_from_pdf(pdf_file)
76
  else:
77
  # Use the input text
78
  text = text_input
79
+
80
  # Chunk the text
81
  chunks = chunk_text(text)
82
+
83
+ # Summarize chunks with conciseness level
84
+ final_summary = summarize_chunks(chunks, conciseness)
85
  return final_summary
86
 
87
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
88
  gr.Markdown(
89
+ """
90
+ # PDF And Text Summarizer
91
+ ### Advanced PDF and Text Summarization with Conciseness Control
92
+ - Upload your PDF document or enter text directly, adjust the conciseness level, and let AI generate a summary.
93
+ """
 
94
  )
95
+
96
  with gr.Row():
97
  with gr.Column(scale=1):
98
  input_pdf = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
99
  input_text = gr.Textbox(label="Or enter text here", lines=5, placeholder="Paste or type your text here...")
100
+ conciseness_slider = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Conciseness Level")
101
  submit_btn = gr.Button("Generate Summary", variant="primary")
102
+
103
  with gr.Column(scale=2):
104
  output = gr.Textbox(label="Generated Summary", lines=10)
105
+
106
  gr.Markdown(
107
+ """
108
+ ### How it works
109
+ 1. Upload a PDF file or enter text directly
110
+ 2. Adjust the conciseness level:
111
+ - 0 (Most detailed) to 1 (Most concise)
112
+ 3. Click "Generate Summary"
113
+ 4. Wait for the AI to process and summarize your content
114
+ 5. Review the generated summary
115
+
116
+ *Powered by LLAMA 3.1 8B model and LangChain*
117
+ """
118
  )
119
+
120
+ submit_btn.click(summarize_content, inputs=[input_pdf, input_text, conciseness_slider], outputs=output)
121
 
122
  iface.launch()