Spaces:

nasa-hackathon1
/

bio_search_engine_hf

Sleeping

App Files Files Community

Digambar29 commited on Oct 2, 2025

Commit

7770bcb

1 Parent(s): e0f03d9

Changed the model to mixtral-8x7b-32768 for groq

Browse files

Files changed (1) hide show

backend/app.py +29 -16

backend/app.py CHANGED Viewed

@@ -276,21 +276,6 @@ def ask_question_endpoint():
         with open(metadata_path, "rb") as f: metadata = pickle.load(f)
         chunks, chunk_metadata = metadata["chunks"], metadata["chunk_metadata"]
-        # --- 2. Retrieve Context ---
-        query_vec = get_embedding_model().encode([query], show_progress_bar=False)
-        distances, indices = index.search(np.array(query_vec), k=3)
-        context = "\n".join([chunks[i] for i in indices[0]])
-        sources = list(set([chunk_metadata[i]['title'] for i in indices[0] if i < len(chunk_metadata)]))
-        # --- 3. Generate Prompt ---
-        prompt_template = (
-            "Using ONLY the information from the following context, answer the question. "
-            "Do not mention the context in your answer. Be concise.\n\n"
-            "Context:\n{context}\n\n"
-            "Question: {query}\n\nAnswer:"
-        )
-        prompt = prompt_template.format(context=context, query=query)
         # --- 4. Generate Response (with API/Local Fallback) ---
         response = ""
         use_api = GROQ_API_KEY and GROQ_API_KEY != "your_groq_api_key_here"
@@ -299,11 +284,25 @@ def ask_question_endpoint():
             # --- Primary Method: Try Groq API ---
             global api_llm
             if api_llm is None: api_llm = Groq(api_key=GROQ_API_KEY)
             logging.info("Attempting to generate response with Groq API...")
             try:
                 chat_completion = api_llm.chat.completions.create(
                     messages=[{"role": "user", "content": prompt}],
-                    model="llama3-70b-8192", # Use a current, supported model
                     temperature=0.5, max_tokens=250
                 )
                 response = chat_completion.choices[0].message.content
@@ -314,10 +313,24 @@ def ask_question_endpoint():
         if not use_api:
             # --- Fallback Method: Use Local Model ---
             logging.info("Generating response with local model.")
             global local_llm
             if local_llm is None: local_llm = get_local_llm()
             response = local_llm.generate(prompt, max_tokens=250, temp=0.5)
         return jsonify({"answer": response, "sources": sources})

         with open(metadata_path, "rb") as f: metadata = pickle.load(f)
         chunks, chunk_metadata = metadata["chunks"], metadata["chunk_metadata"]
         # --- 4. Generate Response (with API/Local Fallback) ---
         response = ""
         use_api = GROQ_API_KEY and GROQ_API_KEY != "your_groq_api_key_here"
             # --- Primary Method: Try Groq API ---
             global api_llm
             if api_llm is None: api_llm = Groq(api_key=GROQ_API_KEY)
+            # Retrieve top 3 chunks for the powerful API model
+            query_vec = get_embedding_model().encode([query], show_progress_bar=False)
+            distances, indices = index.search(np.array(query_vec), k=3)
+            context = "\n".join([chunks[i] for i in indices[0]])
+            sources = list(set([chunk_metadata[i]['title'] for i in indices[0] if i < len(chunk_metadata)]))
+            prompt_template = ("Using ONLY the information from the following context, answer the question. "
+                               "Do not mention the context in your answer. Be concise.\n\n"
+                               "Context:\n{context}\n\nQuestion: {query}\n\nAnswer:")
+            prompt = prompt_template.format(context=context, query=query)
             logging.info("Attempting to generate response with Groq API...")
             try:
                 chat_completion = api_llm.chat.completions.create(
                     messages=[{"role": "user", "content": prompt}],
+                    # Use a stable, powerful model like Mixtral.
+                    # Other options include 'gemma-7b-it'.
+                    model="mixtral-8x7b-32768",
                     temperature=0.5, max_tokens=250
                 )
                 response = chat_completion.choices[0].message.content
         if not use_api:
             # --- Fallback Method: Use Local Model ---
+            # The local model has a small context window, so we only use the single most relevant chunk (k=1).
+            query_vec = get_embedding_model().encode([query], show_progress_bar=False)
+            distances, indices = index.search(np.array(query_vec), k=1)
+            context = "\n".join([chunks[i] for i in indices[0]])
+            sources = list(set([chunk_metadata[i]['title'] for i in indices[0] if i < len(chunk_metadata)]))
+            prompt_template = ("Using ONLY the information from the following context, answer the question. "
+                               "Do not mention the context in your answer. Be concise.\n\n"
+                               "Context:\n{context}\n\nQuestion: {query}\n\nAnswer:")
+            prompt = prompt_template.format(context=context, query=query)
             logging.info("Generating response with local model.")
             global local_llm
             if local_llm is None: local_llm = get_local_llm()
             response = local_llm.generate(prompt, max_tokens=250, temp=0.5)
+            # Check if the local model returned an error message instead of an answer
+            if "LLaMA ERROR" in response:
+                raise RuntimeError("The local model failed to generate a response due to context size limitations.")
         return jsonify({"answer": response, "sources": sources})