raksama19 commited on
Commit
31f1c8b
·
verified ·
1 Parent(s): ab5e121

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -21
app.py CHANGED
@@ -120,7 +120,7 @@ class DOLPHIN:
120
  do_sample=False,
121
  num_beams=1,
122
  repetition_penalty=1.1,
123
- temperature=1.0
124
  )
125
 
126
  sequences = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=False)
@@ -191,7 +191,7 @@ Provide a descriptive alt text in 1-2 sentences that is informative but not over
191
  max_new_tokens=256,
192
  disable_compile=True,
193
  do_sample=False,
194
- temperature=1.0,
195
  pad_token_id=self.processor.tokenizer.pad_token_id,
196
  eos_token_id=self.processor.tokenizer.eos_token_id
197
  )
@@ -252,7 +252,7 @@ Provide a descriptive alt text in 1-2 sentences that is informative but not over
252
  max_new_tokens=1024,
253
  disable_compile=True,
254
  do_sample=False,
255
- temperature=1.0,
256
  pad_token_id=self.processor.tokenizer.pad_token_id,
257
  eos_token_id=self.processor.tokenizer.eos_token_id
258
  )
@@ -982,31 +982,49 @@ with gr.Blocks(
982
  return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please process a PDF document first before asking questions."}]
983
 
984
  try:
985
- # Use RAG to get relevant chunks from markdown
986
- if document_chunks and len(document_chunks) > 0:
987
- relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
988
- context = "\n\n".join(relevant_chunks)
989
- # Smart truncation: aim for ~6000 chars for local model
990
- if len(context) > 6000:
991
- # Try to cut at sentence boundaries
992
- sentences = context[:6000].split('.')
993
- context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:6000] + '...'
994
- else:
995
- # Fallback to truncated document if RAG fails
996
- context = processed_markdown[:6000] + "..." if len(processed_markdown) > 6000 else processed_markdown
997
 
998
- # Create prompt for Gemma 3n
999
- prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1000
 
1001
  Context from the document:
1002
  {context}
1003
 
1004
  Question: {message}
1005
 
1006
- Please provide a clear and helpful answer based on the context provided."""
1007
-
1008
- # Generate response using local Gemma 3n
1009
- response_text = gemma_model.chat(prompt)
 
 
 
 
 
1010
  return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response_text}]
1011
 
1012
  except Exception as e:
 
120
  do_sample=False,
121
  num_beams=1,
122
  repetition_penalty=1.1,
123
+ temperature=0.2
124
  )
125
 
126
  sequences = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=False)
 
191
  max_new_tokens=256,
192
  disable_compile=True,
193
  do_sample=False,
194
+ temperature=0.2,
195
  pad_token_id=self.processor.tokenizer.pad_token_id,
196
  eos_token_id=self.processor.tokenizer.eos_token_id
197
  )
 
252
  max_new_tokens=1024,
253
  disable_compile=True,
254
  do_sample=False,
255
+ temperature=0.2,
256
  pad_token_id=self.processor.tokenizer.pad_token_id,
257
  eos_token_id=self.processor.tokenizer.eos_token_id
258
  )
 
982
  return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please process a PDF document first before asking questions."}]
983
 
984
  try:
985
+ # Check if it's a simple greeting or conversational message
986
+ greeting_words = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening', 'thanks', 'thank you']
987
+ is_greeting = any(greeting.lower() in message.lower() for greeting in greeting_words)
 
 
 
 
 
 
 
 
 
988
 
989
+ if is_greeting and len(message.split()) <= 3:
990
+ # Handle simple greetings without RAG
991
+ if 'hi' in message.lower() or 'hello' in message.lower() or 'hey' in message.lower():
992
+ response_text = "Hello! I'm here to help you with questions about your processed document. What would you like to know?"
993
+ elif 'thank' in message.lower():
994
+ response_text = "You're welcome! Feel free to ask me anything about the document."
995
+ else:
996
+ response_text = "Hello! How can I help you understand the document better?"
997
+ else:
998
+ # Use RAG for document-related questions
999
+ if document_chunks and len(document_chunks) > 0:
1000
+ relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
1001
+ context = "\n\n".join(relevant_chunks)
1002
+ # Smart truncation: aim for ~6000 chars for local model
1003
+ if len(context) > 6000:
1004
+ # Try to cut at sentence boundaries
1005
+ sentences = context[:6000].split('.')
1006
+ context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:6000] + '...'
1007
+ else:
1008
+ # Fallback to truncated document if RAG fails
1009
+ context = processed_markdown[:6000] + "..." if len(processed_markdown) > 6000 else processed_markdown
1010
+
1011
+ # Create prompt for Gemma 3n
1012
+ prompt = f"""You are a helpful assistant that answers questions about documents. Answer concisely and directly based on the provided context. If the context doesn't contain relevant information, say so briefly and offer to help with other questions about the document.
1013
 
1014
  Context from the document:
1015
  {context}
1016
 
1017
  Question: {message}
1018
 
1019
+ Answer:"""
1020
+
1021
+ # Generate response using local Gemma 3n
1022
+ response_text = gemma_model.chat(prompt)
1023
+
1024
+ # Clean up repetitive text and Korean characters
1025
+ response_text = response_text.split('답변:')[0].strip() # Remove Korean repetitions
1026
+ response_text = response_text.split('Answer:')[-1].strip() # Clean prompt artifacts
1027
+
1028
  return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response_text}]
1029
 
1030
  except Exception as e: