Spaces:

Darshan03
/

Triomics-app

Sleeping

App Files Files Community

Darshan03 commited on Jan 8, 2025

Commit

b71faa5

verified ·

1 Parent(s): 6d41a2c

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -4

app.py CHANGED Viewed

@@ -17,6 +17,10 @@ from langchain_community.chat_message_histories import ChatMessageHistory
 from langchain.chains import create_history_aware_retriever, create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from IPython.display import Markdown, display
 # Define the data folder path
 DATA_FOLDER = "data"
@@ -86,7 +90,8 @@ if uploaded_file is not None:
                 )
                 vector_store.add_documents(documents=docs)
-                llm = ChatGroq(groq_api_key=groq_api, model_name="llama-3.3-70b-versatile")
                 contextualize_q_prompt = ChatPromptTemplate.from_messages(
                     [
@@ -132,10 +137,10 @@ just reformulate it if needed and otherwise return it as is."""),
 **Note:** This prompt emphasizes careful consideration and accurate response based on the provided context.
 """)
-                question_answer_chain = create_stuff_documents_chain(llm, qa_prompt_template)
                 history_aware_retriever = create_history_aware_retriever(
-                    llm,
                     vector_store.as_retriever(
                         search_type="mmr",
                         search_kwargs={'k': 10, 'fetch_k': 50}
@@ -155,7 +160,7 @@ just reformulate it if needed and otherwise return it as is."""),
                 st.session_state.conversational_rag_chain = conversational_rag_chain
                 st.session_state.chat_history_store = chat_history_store
-                st.success("Data processed! You can now ask questions.")
         if "conversational_rag_chain" in st.session_state:
             user_question = st.text_input("Ask a question about the data:", key="user_question")
@@ -168,6 +173,68 @@ just reformulate it if needed and otherwise return it as is."""),
                     )
                     st.markdown(response['answer'])
     except json.JSONDecodeError:
         st.error("Error: The uploaded file is not a valid JSON file.")
     except Exception as e:

 from langchain.chains import create_history_aware_retriever, create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from IPython.display import Markdown, display
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.pydantic_v1 import BaseModel, Field
+from typing import List, Optional
+from dataclasses import dataclass, field
 # Define the data folder path
 DATA_FOLDER = "data"
                 )
                 vector_store.add_documents(documents=docs)
+                llm = ChatGroq(groq_api_key=groq_api, model_name="llama-3.1-8b-instant")
+                st.session_state.llm = llm # Store llm for later use
                 contextualize_q_prompt = ChatPromptTemplate.from_messages(
                     [
 **Note:** This prompt emphasizes careful consideration and accurate response based on the provided context.
 """)
+                question_answer_chain = create_stuff_documents_chain(st.session_state.llm, qa_prompt_template)
                 history_aware_retriever = create_history_aware_retriever(
+                    st.session_state.llm,
                     vector_store.as_retriever(
                         search_type="mmr",
                         search_kwargs={'k': 10, 'fetch_k': 50}
                 st.session_state.conversational_rag_chain = conversational_rag_chain
                 st.session_state.chat_history_store = chat_history_store
+                st.success("Data processed! You can now ask questions and generate structured output.")
         if "conversational_rag_chain" in st.session_state:
             user_question = st.text_input("Ask a question about the data:", key="user_question")
                     )
                     st.markdown(response['answer'])
+        st.subheader("Generate Structured Output")
+        if st.button("Generate Structured Cancer Information"):
+            with st.spinner("Generating structured output..."):
+                json_data = json.loads(Path(file_path).read_text())
+                context = ""
+                for item in json_data:
+                    context += json.dumps(item, indent=4)
+                @dataclass
+                class Stage:
+                    """Cancer Stage information."""
+                    T: str = field(metadata={"description": "T Stage"})
+                    N: str = field(metadata={"description": "N Stage"})
+                    M: str = field(metadata={"description": "M Stage"})
+                    group_stage: str = field(metadata={"description": "Group Stage"})
+                @dataclass
+                class DiagnosisCharacteristic:
+                    """Primary cancer condition details."""
+                    primary_cancer_condition: str = field(metadata={"description": "Primary cancer condition Example “Breast Cancer”, “Lung Cancer”, etc which given in patient data"})
+                    diagnosis_date: str = field(metadata={"description": "Earliest date on which the cancer got confirmed Diagnosis date in MM-DD-YYYY format Example:  How to Find: Typically in sentences such as “The biopsy on 01/12/2020 confirmed invasive ductal carcinoma.” or “Pathology Report (02/17/2020): Invasive breast cancer.” c. You may see multiple references to diagnosis across notes; pick the earliest one that specifically confirms the cancer."})
+                    histology: List[str] = field(metadata={"description": """{Histological classification of the primary cancer condition, Describes the microscopic subtype of the tumor. Common examples: “Adenocarcinoma,” “Invasive ductal carcinoma,” “Squamous cell carcinoma,” etc. b. How to Find: In pathology reports or biopsy results. Terms like “Histologically consistent with adenocarcinoma” or “Invasive ductal carcinoma, Grade 2.”}"""})
+                    stage: Stage = field(metadata={"description": """{Indicates Tumor size/extent. E.g., T2 means a moderate-sized tumor, T4 might mean a larger or invasive tumor. b. N: Indicates lymph Nodes involvement. N0 means no nodal involvement, N1/N2 means progressively more nodes involved. c. M: Indicates Metastasis. M0 means no distant spread; M1 means present. d. Group Stage: A single label (Stage I, Stage IIB, Stage IV, etc.) summarizing T, N, and M combined. e. How to Find: In imaging reports, pathology final reports, or physician notes, e.g. “Stage IIB (T2 N1 M0).” or “pT2 N1 M0.”}"""})
+                @dataclass
+                class CancerRelatedMedication:
+                    """Cancer related medication details."""
+                    medication_name: str = field(metadata={"description": "Medication for cancer:For example, “Doxorubicin,” “Cyclophosphamide,” “Paclitaxel,” “Trastuzumab,” “Pembrolizumab,” “Letrozole,” etc. "})
+                    start_date: str = field(metadata={"description": "The earliest date this medication was started, in MM-DD-YYYY format, if available. Start date in MM-DD-YYYY format"})
+                    end_date: str = field(metadata={"description": "The date the medication was stopped, if mentioned. If the patient is still on the medication, you may leave it blank or mark as nullEnd date in MM-DD-YYYY format"})
+                    intent: str = field(metadata={"description": "A free-text field describing why the medication was given. Examples: “Adjuvant therapy post-surgery,” “Neoadjuvant therapy to shrink tumor,” “Maintenance therapy for HER2+ disease,” or “Hormonal therapy to block estrogen in ER+ cancer.”"})
+                @dataclass
+                class CancerInformation:
+                    """Structured information about cancer diagnosis and medication."""
+                    diagnosis_characteristics: List[DiagnosisCharacteristic] = field(metadata={"description": "List of primary cancers"})
+                    cancer_related_medications: List[CancerRelatedMedication] = field(metadata={"description": "List of cancer related medication given to the patient"})
+                structured_llm = st.session_state.llm.with_structured_output(CancerInformation)
+                try:
+                    output = structured_llm.invoke(context)
+                    st.subheader("Generated Structured Output:")
+                    st.json(output.dict())
+                    # Save the generated output to a JSON file
+                    output_filename = f"{Path(file_path).stem}_structured.json"
+                    output_filepath = os.path.join(DATA_FOLDER, output_filename)
+                    with open(output_filepath, "w") as f:
+                        json.dump(output.dict(), f, indent=4)
+                    # Provide a download button
+                    with open(output_filepath, "rb") as f:
+                        st.download_button(
+                            label="Download Generated JSON",
+                            data=f,
+                            file_name=output_filename,
+                            mime="application/json",
+                        )
+                except Exception as e:
+                    st.error(f"Error generating structured output: {e}")
     except json.JSONDecodeError:
         st.error("Error: The uploaded file is not a valid JSON file.")
     except Exception as e: