Spaces:

raksama19
/

Scholar-Express

Sleeping

App Files Files Community

raksama19 commited on Jul 25

Commit

3f965c2

verified ·

1 Parent(s): 31f1c8b

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -47

app.py CHANGED Viewed

@@ -120,7 +120,7 @@ class DOLPHIN:
                 do_sample=False,
                 num_beams=1,
                 repetition_penalty=1.1,
-                temperature=0.2
             )
         sequences = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=False)
@@ -185,15 +185,13 @@ Provide a descriptive alt text in 1-2 sentences that is informative but not over
             )
             input_len = input_ids["input_ids"].shape[-1]
-            input_ids = input_ids.to(self.model.device)
             outputs = self.model.generate(
                 **input_ids,
                 max_new_tokens=256,
                 disable_compile=True,
                 do_sample=False,
-                temperature=0.2,
-                pad_token_id=self.processor.tokenizer.pad_token_id,
-                eos_token_id=self.processor.tokenizer.eos_token_id
             )
             text = self.processor.batch_decode(
@@ -246,15 +244,13 @@ Provide a descriptive alt text in 1-2 sentences that is informative but not over
             )
             input_len = input_ids["input_ids"].shape[-1]
-            input_ids = input_ids.to(self.model.device)
             outputs = self.model.generate(
                 **input_ids,
                 max_new_tokens=1024,
                 disable_compile=True,
-                do_sample=False,
-                temperature=0.2,
-                pad_token_id=self.processor.tokenizer.pad_token_id,
-                eos_token_id=self.processor.tokenizer.eos_token_id
             )
             text = self.processor.batch_decode(
@@ -690,7 +686,7 @@ def create_embeddings(chunks):
 def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
     """Retrieve most relevant chunks for a question"""
     if embedding_model is None or embeddings is None:
-        return chunks[:3] # Fallback to first 3 chunks
     try:
         question_embedding = embedding_model.encode([question], show_progress_bar=False)
@@ -982,49 +978,31 @@ with gr.Blocks(
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please process a PDF document first before asking questions."}]
         try:
-            # Check if it's a simple greeting or conversational message
-            greeting_words = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening', 'thanks', 'thank you']
-            is_greeting = any(greeting.lower() in message.lower() for greeting in greeting_words)
-            if is_greeting and len(message.split()) <= 3:
-                # Handle simple greetings without RAG
-                if 'hi' in message.lower() or 'hello' in message.lower() or 'hey' in message.lower():
-                    response_text = "Hello! I'm here to help you with questions about your processed document. What would you like to know?"
-                elif 'thank' in message.lower():
-                    response_text = "You're welcome! Feel free to ask me anything about the document."
-                else:
-                    response_text = "Hello! How can I help you understand the document better?"
             else:
-                # Use RAG for document-related questions
-                if document_chunks and len(document_chunks) > 0:
-                    relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
-                    context = "\n\n".join(relevant_chunks)
-                    # Smart truncation: aim for ~6000 chars for local model
-                    if len(context) > 6000:
-                        # Try to cut at sentence boundaries
-                        sentences = context[:6000].split('.')
-                        context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:6000] + '...'
-                else:
-                    # Fallback to truncated document if RAG fails
-                    context = processed_markdown[:6000] + "..." if len(processed_markdown) > 6000 else processed_markdown
-                # Create prompt for Gemma 3n
-                prompt = f"""You are a helpful assistant that answers questions about documents. Answer concisely and directly based on the provided context. If the context doesn't contain relevant information, say so briefly and offer to help with other questions about the document.
 Context from the document:
 {context}
 Question: {message}
-Answer:"""
-                # Generate response using local Gemma 3n
-                response_text = gemma_model.chat(prompt)
-                # Clean up repetitive text and Korean characters
-                response_text = response_text.split('답변:')[0].strip()  # Remove Korean repetitions
-                response_text = response_text.split('Answer:')[-1].strip()  # Clean prompt artifacts
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response_text}]
         except Exception as e:

                 do_sample=False,
                 num_beams=1,
                 repetition_penalty=1.1,
+                temperature=1.0
             )
         sequences = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=False)
             )
             input_len = input_ids["input_ids"].shape[-1]
+            input_ids = input_ids.to(self.model.device, dtype=self.model.dtype)
             outputs = self.model.generate(
                 **input_ids,
                 max_new_tokens=256,
                 disable_compile=True,
                 do_sample=False,
+                temperature=0.1
             )
             text = self.processor.batch_decode(
             )
             input_len = input_ids["input_ids"].shape[-1]
+            input_ids = input_ids.to(self.model.device, dtype=self.model.dtype)
             outputs = self.model.generate(
                 **input_ids,
                 max_new_tokens=1024,
                 disable_compile=True,
+                do_sample=True,
+                temperature=0.7
             )
             text = self.processor.batch_decode(
 def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
     """Retrieve most relevant chunks for a question"""
     if embedding_model is None or embeddings is None:
+        return chunks[:3]  # Fallback to first 3 chunks
     try:
         question_embedding = embedding_model.encode([question], show_progress_bar=False)
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please process a PDF document first before asking questions."}]
         try:
+            # Use RAG to get relevant chunks from markdown
+            if document_chunks and len(document_chunks) > 0:
+                relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
+                context = "\n\n".join(relevant_chunks)
+                # Smart truncation: aim for ~6000 chars for local model
+                if len(context) > 6000:
+                    # Try to cut at sentence boundaries
+                    sentences = context[:6000].split('.')
+                    context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:6000] + '...'
             else:
+                # Fallback to truncated document if RAG fails
+                context = processed_markdown[:6000] + "..." if len(processed_markdown) > 6000 else processed_markdown
+            # Create prompt for Gemma 3n
+            prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
 Context from the document:
 {context}
 Question: {message}
+Please provide a clear and helpful answer based on the context provided."""
+            # Generate response using local Gemma 3n
+            response_text = gemma_model.chat(prompt)
             return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response_text}]
         except Exception as e: