Spaces:

mohith96
/

Test

Build error

App Files Files Community

mohith96 commited on Sep 27, 2025

Commit

1daeb77

verified ·

1 Parent(s): 55a80e5

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -117

app.py CHANGED Viewed

@@ -1,130 +1,27 @@
 import streamlit as st
-import os
-import json
-import requests
-from langchain_community.document_loaders import PyMuPDFLoader
-from openai import OpenAI
-import tiktoken
-import pandas as pd
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.embeddings.openai import OpenAIEmbeddings
-from langchain_community.vectorstores import Chroma
 import tempfile
-OPENAI_API_KEY = os.environ.get("API_KEY")
-OPENAI_API_BASE = os.environ.get("API_BASE")
-# Initialize OpenAI client
-client = OpenAI(
-    api_key=OPENAI_API_KEY,
-    base_url=OPENAI_API_BASE
-)
-# Define the system prompt for the model
-qna_system_message = """
-You are an AI assistant designed to support professional doctors at St. Bernard's Medical Center. Your task is to provide evidence-based, concise, and relevant medical information to doctors' clinical questions based on the context provided.
-User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context. The context contains references to specific portions of trusted medical literature and research articles relevant to the query, along with their source details.
-When crafting your response:
-1. Use only the provided context to answer the question.
-2. If the answer is found in the context, respond with concise and actionable medical insights.
-3. Include the source reference with the page number, journal name, or publication, as provided in the context.
-4. If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base."
-Please adhere to the following response guidelines:
-- Provide clear, direct answers using only the given context.
-- Do not include any additional information outside of the context.
-- Avoid rephrasing or summarizing the context unless explicitly relevant to the question.
-- If no relevant answer exists in the context, respond with: "Sorry, this is out of my knowledge base."
-- If the context is not provided, your response should also be: "Sorry, this is out of my knowledge base."
-Here is an example of how to structure your response:
-Answer:
-[Answer based on context]
-Source:
-[Source details with page or section]
-"""
-# Define the user message template
-qna_user_message_template = """
-###Context
-Here are some excerpts from GEN AI Research Paper and their sources that are relevant to the Gen AI question mentioned below:
-{context}
-###Question
-{question}
-"""
 @st.cache_resource
-def load_and_process_pdfs(uploaded_files):
-    all_documents = []
     for uploaded_file in uploaded_files:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
             tmp_file.write(uploaded_file.getvalue())
             tmp_file_path = tmp_file.name
-        loader = PyMuPDFLoader(tmp_file_path)
-        documents = loader.load()
-        all_documents.extend(documents)
-        os.remove(tmp_file_path) # Clean up the temporary file
-    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
-        encoding_name='cl100k_base',
-        chunk_size=1000,
-    )
-    document_chunks = text_splitter.split_documents(all_documents)
-    embedding_model = OpenAIEmbeddings(
-        openai_api_key=OPENAI_API_KEY,
-        openai_api_base=OPENAI_API_BASE
-    )
-    # Create an in-memory vector store (or use a persistent one if needed)
-    vectorstore = Chroma.from_documents(
-        document_chunks,
-        embedding_model
-    )
-    return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 5})
-def generate_rag_response(user_input, retriever, max_tokens=500, temperature=0, top_p=0.95):
-    # Retrieve relevant document chunks
-    relevant_document_chunks = retriever.get_relevant_documents(query=user_input)
-    context_list = [d.page_content for d in relevant_document_chunks]
-    # Combine document chunks into a single context
-    context_for_query = ". ".join(context_list)
-    user_message = qna_user_message_template.replace('{context}', context_for_query)
-    user_message = user_message.replace('{question}', user_input)
-    # Generate the response
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4o-mini",
-            messages=[
-                {"role": "system", "content": qna_system_message},
-                {"role": "user", "content": user_message}
-            ],
-            max_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p
-        )
-        response = response.choices[0].message.content.strip()
-    except Exception as e:
-        response = f'Sorry, I encountered the following error: \n {e}'
-    return response
-# Streamlit App
-st.title("LLM-Powered Research Assistant")
-uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
-retriever = None
 if uploaded_files:
     st.info("Processing uploaded PDFs...")
-    retriever = load_and_process_pdfs(uploaded_files)
-    st.success("PDFs processed and ready for questioning!")
-if retriever:
-    user_question = st.text_input("Ask a question about the uploaded documents:")
-    if user_question:
-        with st.spinner("Generating response..."):
-            rag_response = generate_rag_response(user_question, retriever)
-            st.write(rag_response)

 import streamlit as st
 import tempfile
+import os
 @st.cache_resource
+def process_pdfs(uploaded_files):
+    file_names = []
     for uploaded_file in uploaded_files:
+        # Save uploaded file to a temp file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
             tmp_file.write(uploaded_file.getvalue())
             tmp_file_path = tmp_file.name
+        file_names.append(uploaded_file.name)
+        # Clean up immediately
+        os.remove(tmp_file_path)
+    return file_names
+st.title("PDF Upload Test")
+uploaded_files = st.file_uploader(
+    "Upload PDF files", type=["pdf"], accept_multiple_files=True
+)
 if uploaded_files:
     st.info("Processing uploaded PDFs...")
+    file_names = process_pdfs(uploaded_files)
+    st.success(f"Uploaded {len(file_names)} file(s): {file_names}")