Spaces:

jk12p
/

RAG

Runtime error

App Files Files Community

jk12p commited on Apr 30, 2025

Commit

da4682c

verified ·

1 Parent(s): ea4de28

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -15

app.py CHANGED Viewed

@@ -10,19 +10,19 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 # --- CONFIG ---
 HF_TOKEN = os.environ["HF_TOKEN"]  # Taken from Hugging Face Space secrets
-# Load tokenizer and model with token
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
-    "google/gemma-2b-it",
     torch_dtype=torch.float16,
-    device_map="auto",
     token=HF_TOKEN
 )
 # Load sentence transformer for embeddings
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
-st.title("🔍 RAG App using 🤖 Gemma 2B")
 uploaded_file = st.file_uploader("📄 Upload a PDF or TXT file", type=["pdf", "txt"])
@@ -50,7 +50,7 @@ def create_faiss_index(chunks):
     return index, embeddings
 # Retrieve top-k chunks
-def retrieve_chunks(query, chunks, index, embeddings, k=5):  # increased k
     query_embedding = embedder.encode([query])
     D, I = index.search(np.array(query_embedding), k)
     return [chunks[i] for i in I[0]]
@@ -73,12 +73,14 @@ if uploaded_file:
         with st.spinner("Thinking..."):
             context = "\n".join(retrieve_chunks(user_question, chunks, index, embeddings))
-            # Improved prompt
             prompt = (
-                f"You are an expert assistant. Use the following context to answer the user's question.\n"
-                f"If the answer (e.g., a name) is mentioned anywhere in the context, extract it precisely.\n"
-                f"If it's not found, say clearly: 'Name not found.'\n\n"
-                f"Context:\n{context}\n\nQuestion: {user_question}\nAnswer:"
             )
             input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
@@ -86,14 +88,20 @@ if uploaded_file:
             with torch.no_grad():
                 outputs = model.generate(
                     input_ids,
-                    max_new_tokens=256,  # Using max_new_tokens instead of max_length
                     num_return_sequences=1,
-                    temperature=0.7,
-                    do_sample=False
                 )
             generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            answer = generated_text.split("Answer:")[-1].strip()
         st.markdown("### 🧠 Answer:")
         st.success(answer)

 # --- CONFIG ---
 HF_TOKEN = os.environ["HF_TOKEN"]  # Taken from Hugging Face Space secrets
+# Load tokenizer and model (replaced Gemma 2B with Phi-2)
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", token=HF_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
+    "microsoft/phi-2",
     torch_dtype=torch.float16,
+    device_map="auto",
     token=HF_TOKEN
 )
 # Load sentence transformer for embeddings
 embedder = SentenceTransformer("all-MiniLM-L6-v2")
+st.title("🔍 RAG App using 🤖 Phi-2")
 uploaded_file = st.file_uploader("📄 Upload a PDF or TXT file", type=["pdf", "txt"])
     return index, embeddings
 # Retrieve top-k chunks
+def retrieve_chunks(query, chunks, index, embeddings, k=5):
     query_embedding = embedder.encode([query])
     D, I = index.search(np.array(query_embedding), k)
     return [chunks[i] for i in I[0]]
         with st.spinner("Thinking..."):
             context = "\n".join(retrieve_chunks(user_question, chunks, index, embeddings))
+            # Updated prompt for Phi-2's instruction style
             prompt = (
+                f"Instruction: Answer the following question using only the context provided. "
+                f"Extract specific information directly from the context when available. "
+                f"If the answer is not in the context, respond with 'Information not found.'\n\n"
+                f"Context:\n{context}\n\n"
+                f"Question: {user_question}\n\n"
+                f"Answer: "
             )
             input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
             with torch.no_grad():
                 outputs = model.generate(
                     input_ids,
+                    max_new_tokens=256,  # Keep using max_new_tokens as fixed before
                     num_return_sequences=1,
+                    temperature=0.2,  # Lower temperature for more focused answers
+                    do_sample=True,   # Enable sampling for more natural responses
+                    top_p=0.9,        # Add top_p sampling for better quality
                 )
             generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract the answer part - adapt based on Phi-2's output format
+            if "Answer:" in generated_text:
+                answer = generated_text.split("Answer:")[-1].strip()
+            else:
+                answer = generated_text.replace(prompt, "").strip()
         st.markdown("### 🧠 Answer:")
         st.success(answer)