Spaces:

achraf2203
/

RAG-Chatbot

Sleeping

App Files Files Community

mohamedachraf commited on Aug 4, 2025

Commit

b88f075

1 Parent(s): aa52fc9

modify the pipeline

Browse files

Files changed (1) hide show

app.py +16 -54

app.py CHANGED Viewed

@@ -31,10 +31,10 @@ import tempfile
 # Prompt template
 template = """Context: {context}
-Question: {query}
 Answer:"""
-QA_PROMPT = PromptTemplate(template=template, input_variables=["query", "context"])
 # Load Phi-2 model from hugging face hub
@@ -143,9 +143,7 @@ def generate(question, answer, text_file, max_new_tokens):
         return
     try:
-        streamer = TextIteratorStreamer(
-            tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
-        )
         phi2_pipeline = pipeline(
             "text-generation",
             tokenizer=tokenizer,
@@ -157,7 +155,6 @@ def generate(question, answer, text_file, max_new_tokens):
             temperature=0.7,
             top_p=0.9,
             repetition_penalty=1.1,
-            streamer=streamer,
         )
         hf_model = HuggingFacePipeline(pipeline=phi2_pipeline)
@@ -169,58 +166,23 @@ def generate(question, answer, text_file, max_new_tokens):
             yield "Your question is too long! Please shorten it."
             return
-        # Run the chain in a separate thread
-        result_container = {"result": None, "error": None}
-        def run_chain():
-            try:
-                result_container["result"] = qa_chain.invoke({"query": query})
-            except Exception as e:
-                result_container["error"] = str(e)
-        thread = Thread(target=run_chain)
-        thread.start()
-        # Stream the response
-        response = ""
         try:
-            for token in streamer:
-                response += token
-                # Clean up the response - stop at natural points
-                cleaned_response = response.strip()
-                # Stop if we hit repetitive patterns
-                words = cleaned_response.split()
-                if len(words) > 10:
-                    # Check for repetitive patterns
-                    last_words = words[-5:]
-                    if len(set(last_words)) <= 2:  # Too much repetition
-                        break
-                # Stop at sentence endings if we have enough content
-                if len(cleaned_response) > 50 and cleaned_response.endswith(('.', '!', '?')):
-                    yield cleaned_response
-                    break
-                yield cleaned_response
         except Exception as e:
-            yield f"Error during streaming: {str(e)}"
             return
-        # Wait for thread to complete
-        thread.join()
-        # Check for errors
-        if result_container["error"]:
-            yield f"Error: {result_container['error']}"
-            return
-        # Final cleanup of response
-        final_response = clean_response(response.strip())
-        # Yield the final cleaned response
-        if final_response != response.strip():
-            yield final_response
     except Exception as e:
         yield f"Error: {str(e)}"

 # Prompt template
 template = """Context: {context}
+Question: {question}
 Answer:"""
+QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
 # Load Phi-2 model from hugging face hub
         return
     try:
+        # Create pipeline without streamer first to test
         phi2_pipeline = pipeline(
             "text-generation",
             tokenizer=tokenizer,
             temperature=0.7,
             top_p=0.9,
             repetition_penalty=1.1,
         )
         hf_model = HuggingFacePipeline(pipeline=phi2_pipeline)
             yield "Your question is too long! Please shorten it."
             return
+        # Get the response directly without streaming first
         try:
+            result = qa_chain.invoke({"query": query})
+            # Extract the answer from the result
+            if isinstance(result, dict):
+                response = result.get('result', str(result))
+            else:
+                response = str(result)
+            # Clean the response
+            cleaned_response = clean_response(response)
+            yield cleaned_response
         except Exception as e:
+            yield f"Error during generation: {str(e)}"
             return
     except Exception as e:
         yield f"Error: {str(e)}"