AzizWazir commited on
Commit
5846262
·
verified ·
1 Parent(s): 2558e6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -9
app.py CHANGED
@@ -10,8 +10,8 @@ import docx
10
  import pandas as pd
11
 
12
  # Initialize the summarization and question-answering models from Hugging Face
13
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
14
- qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") # Better QA model
15
 
16
  # Sentence Transformer for embedding-based retrieval
17
  embedder = SentenceTransformer('all-MiniLM-L6-v2') # A compact and efficient embedding model
@@ -22,7 +22,6 @@ API_URL = "https://api.groq.com/openai/v1/chat/completions" # Default endpoint
22
 
23
  # PDF Processing Function
24
  def extract_text_from_pdf(pdf_file):
25
- # Read the file from the uploaded BytesIO object
26
  doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
27
  text = ""
28
  for page in doc:
@@ -47,11 +46,8 @@ def extract_text_from_excel(excel_file):
47
 
48
  # FAISS Indexing Function with better embedding-based chunking
49
  def create_faiss_index(text):
50
- # Split the text into paragraphs or logical sections for better context
51
  paragraphs = text.split('\n\n') # Assuming paragraphs are separated by double newlines
52
  embeddings = embedder.encode(paragraphs) # Use sentence-transformers to create embeddings
53
-
54
- # Create a FAISS index
55
  index = faiss.IndexFlatL2(384) # We use 384-dimensional embeddings (from the chosen model)
56
  index.add(np.array(embeddings).astype(np.float32)) # Add the embeddings to the FAISS index
57
  return index, paragraphs
@@ -123,13 +119,12 @@ if uploaded_file:
123
  answer = qa_pipeline(question=query, context=relevant_chunk) # Use question-answering pipeline
124
  st.write(f"Answer: {answer['answer']}")
125
 
126
- # Summarize the relevant chunk
127
  if relevant_chunk.strip():
128
- # Ensure it's long enough for summarization (avoid too short text)
129
  if len(relevant_chunk.split()) > 20: # Only summarize if the text is sufficiently long
130
  try:
131
  st.write("Summarizing...")
132
- summary = summarizer(relevant_chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
133
  st.write(f"Summary: {summary}")
134
  except Exception as e:
135
  st.write(f"Error summarizing text: {str(e)}")
 
10
  import pandas as pd
11
 
12
  # Initialize the summarization and question-answering models from Hugging Face
13
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", min_length=50, max_length=100) # Concise summary settings
14
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") # Better QA model for concise answers
15
 
16
  # Sentence Transformer for embedding-based retrieval
17
  embedder = SentenceTransformer('all-MiniLM-L6-v2') # A compact and efficient embedding model
 
22
 
23
  # PDF Processing Function
24
  def extract_text_from_pdf(pdf_file):
 
25
  doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
26
  text = ""
27
  for page in doc:
 
46
 
47
  # FAISS Indexing Function with better embedding-based chunking
48
  def create_faiss_index(text):
 
49
  paragraphs = text.split('\n\n') # Assuming paragraphs are separated by double newlines
50
  embeddings = embedder.encode(paragraphs) # Use sentence-transformers to create embeddings
 
 
51
  index = faiss.IndexFlatL2(384) # We use 384-dimensional embeddings (from the chosen model)
52
  index.add(np.array(embeddings).astype(np.float32)) # Add the embeddings to the FAISS index
53
  return index, paragraphs
 
119
  answer = qa_pipeline(question=query, context=relevant_chunk) # Use question-answering pipeline
120
  st.write(f"Answer: {answer['answer']}")
121
 
122
+ # Summarize the relevant chunk (display summary after answering the query)
123
  if relevant_chunk.strip():
 
124
  if len(relevant_chunk.split()) > 20: # Only summarize if the text is sufficiently long
125
  try:
126
  st.write("Summarizing...")
127
+ summary = summarizer(relevant_chunk, max_length=100, min_length=50, do_sample=False)[0]['summary_text']
128
  st.write(f"Summary: {summary}")
129
  except Exception as e:
130
  st.write(f"Error summarizing text: {str(e)}")