Spaces:

Rom89823974978
/

MDA

Runtime error

App Files Files Community

Rom89823974978 commited on May 24, 2025

Commit

dff1399

1 Parent(s): 2b24b35

updates

Browse files

Files changed (3) hide show

backend/main.py +48 -27
frontend/src/components/ProjectDetails.tsx +2 -2
frontend/src/components/ProjectExplorer.tsx +2 -2

backend/main.py CHANGED Viewed

@@ -24,7 +24,7 @@ from langchain.chains import ConversationalRetrievalChain
 from langchain.prompts import PromptTemplate
 from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
-from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, AutoModelForSeq2SeqLM
 from sentence_transformers import CrossEncoder
 from whoosh import index
@@ -55,12 +55,13 @@ class Settings(BaseSettings):
     vectorstore_path: str = "gs://mda_eu_project/vectorstore_index"
     # Models
     embedding_model:     str = "sentence-transformers/LaBSE"
-    llm_model:           str = "meta-llama/Llama-3.2-1B-Instruct"#"meta-llama/Llama-3.2-3B-Instruct"#"google/flan-t5-base"#"google/mt5-base"#"bigscience/bloomz-560m"#"bigscience/bloom-1b7"#"google/mt5-small"#"bigscience/bloom-3b"#"RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
     cross_encoder_model: str = "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1"
     # RAG parameters
     chunk_size:    int = 750
     chunk_overlap: int = 100
-    hybrid_k:      int = 4
     assistant_role: str = (
         "You are a knowledgeable project analyst.  You have access to the following retrieved document snippets (with Project IDs in [brackets])"
     )
@@ -644,24 +645,39 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     # Seq2seq pipeline
     logger.info("Initializing Pipeline")
     #full_model=AutoModelForSeq2SeqLM.from_pretrained(settings.llm_model)
-    full_model = AutoModelForCausalLM.from_pretrained(settings.llm_model)#, device_map="auto")
     # Apply dynamic quantization to all Linear layers
-    llm_model = torch.quantization.quantize_dynamic(
-        full_model,
-        {torch.nn.Linear},
-        dtype=torch.qint8
-    )
     # Create your text-generation pipeline on CPU
     gen_pipe = pipeline(
-        "text-generation",#"text2text-generation",##"text2text-generation",
-        model=llm_model,
-        tokenizer=AutoTokenizer.from_pretrained(settings.llm_model),
-        device=-1,           # CPU
         max_new_tokens=256,
         do_sample=True,
         temperature=0.7,
-        #device_map="auto"
     )
     # Wrap in LangChain's HuggingFacePipeline
     llm = HuggingFacePipeline(pipeline=gen_pipe)
@@ -688,19 +704,24 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     logger.info("Initializing Hybrid Retriever")
     retriever = HybridRetriever(vs=vs, ix=ix, compressor=compressor, cross_encoder=cross_encoder)
-    prompt = PromptTemplate.from_template(
-        f"{settings.assistant_role}\n\n"
-        "{context}\n"
-        "Now answer the user's question thoroughly:"
-        "Question: {question}\n"
-        "Your answer should: \n"
-        "1. Be at least **4-6 sentences** long \n"
-        "2. Explain concepts clearly in full sentences \n"
-        "3. Cite any document you draw on by including its ID in [brackets] inline \n"
-        "4. Provide any high-level conclusions or recommendations at the end \n"
-        "Begin your answer below:"
-    )
     logger.info("Initializing Retrieval Chain")
     app.state.rag_chain = ConversationalRetrievalChain.from_llm(

 from langchain.prompts import PromptTemplate
 from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
+from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, AutoModelForSeq2SeqLM, T5Tokenizer,T5ForConditionalGeneration
 from sentence_transformers import CrossEncoder
 from whoosh import index
     vectorstore_path: str = "gs://mda_eu_project/vectorstore_index"
     # Models
     embedding_model:     str = "sentence-transformers/LaBSE"
+    llm_model:           str = "google/flan-t5-base"
+    #"google/mt5-base"#"meta-llama/Llama-3.2-1B-Instruct"#"meta-llama/Llama-3.2-3B-Instruct"#"google/flan-t5-base"#"google/mt5-base"#"bigscience/bloomz-560m"#"bigscience/bloom-1b7"#"google/mt5-small"#"bigscience/bloom-3b"#"RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
     cross_encoder_model: str = "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1"
     # RAG parameters
     chunk_size:    int = 750
     chunk_overlap: int = 100
+    hybrid_k:      int = 2
     assistant_role: str = (
         "You are a knowledgeable project analyst.  You have access to the following retrieved document snippets (with Project IDs in [brackets])"
     )
     # Seq2seq pipeline
     logger.info("Initializing Pipeline")
     #full_model=AutoModelForSeq2SeqLM.from_pretrained(settings.llm_model)
+    #full_model = AutoModelForCausalLM.from_pretrained(settings.llm_model)#, device_map="auto")
     # Apply dynamic quantization to all Linear layers
+    #llm_model = torch.quantization.quantize_dynamic(
+    #    full_model,
+    #    {torch.nn.Linear},
+    #    dtype=torch.qint8
+    #)
     # Create your text-generation pipeline on CPU
+    #gen_pipe = pipeline(
+    #    "text-generation",#"text2text-generation",##"text2text-generation",
+    #    model=llm_model,
+    #    tokenizer=AutoTokenizer.from_pretrained(settings.llm_model),
+    #    device=-1,           # CPU
+    #    max_new_tokens=256,
+    #    do_sample=True,
+    #    temperature=0.7,
+    #    #device_map="auto"
+    #)
+    tokenizer = T5Tokenizer.from_pretrained(settings.llm_model)
+    model     = T5ForConditionalGeneration.from_pretrained(settings.llm_model)
+    model     = torch.quantization.quantize_dynamic(
+        model, {torch.nn.Linear}, dtype=torch.qint8
+    )
     gen_pipe = pipeline(
+        "text2text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device=-1,
         max_new_tokens=256,
         do_sample=True,
         temperature=0.7,
     )
     # Wrap in LangChain's HuggingFacePipeline
     llm = HuggingFacePipeline(pipeline=gen_pipe)
     logger.info("Initializing Hybrid Retriever")
     retriever = HybridRetriever(vs=vs, ix=ix, compressor=compressor, cross_encoder=cross_encoder)
+    prompt = PromptTemplate.from_template("""
+        {assistant_role}
+        You have the following retrieved document snippets (with Project IDs in [brackets]):
+        {context}
+        User Question:
+        {question}
+        Please answer thoroughly, following these rules:
+        1. Write at least 4-6 full sentences.
+        2. Use clear, technical language in full sentences.
+        3. Cite any document you reference by including its ID in [brackets] inline.
+        4. Conclude with high-level insights or recommendations.
+        Answer:
+        """.strip())
     logger.info("Initializing Retrieval Chain")
     app.state.rag_chain = ConversationalRetrievalChain.from_llm(

frontend/src/components/ProjectDetails.tsx CHANGED Viewed

@@ -125,8 +125,8 @@ export default function ProjectDetails({
             <Text fontWeight="bold">Acronym</Text>
             <Text>{project.acronym}</Text>
           </Box>
-          <Box><Text fontWeight="bold">Start Date</Text><Text>{new Date(project.startDate).toISOString().slice(0,10)}</Text></Box>
-          <Box><Text fontWeight="bold">End Date</Text><Text>{new Date(project.endDate).toISOString().slice(0,10)}</Text></Box>
           <Box><Text fontWeight="bold">Funding (EC max)</Text><Text>€{fmtNum(project.ecMaxContribution)}</Text></Box>
           <Box><Text fontWeight="bold">Total Cost</Text><Text>€{fmtNum(project.totalCost)}</Text></Box>
           <Box><Text fontWeight="bold">Funding Scheme</Text><Text>{project.fundingScheme}</Text></Box>

             <Text fontWeight="bold">Acronym</Text>
             <Text>{project.acronym}</Text>
           </Box>
+          <Box><Text fontWeight="bold">Start Date</Text><Text>{project.startDate.slice(0, 10)}</Text></Box>
+          <Box><Text fontWeight="bold">End Date</Text><Text>{project.endDate.slice(0, 10)}</Text></Box>
           <Box><Text fontWeight="bold">Funding (EC max)</Text><Text>€{fmtNum(project.ecMaxContribution)}</Text></Box>
           <Box><Text fontWeight="bold">Total Cost</Text><Text>€{fmtNum(project.totalCost)}</Text></Box>
           <Box><Text fontWeight="bold">Funding Scheme</Text><Text>{project.fundingScheme}</Text></Box>

frontend/src/components/ProjectExplorer.tsx CHANGED Viewed

@@ -233,7 +233,7 @@ const ProjectExplorer: React.FC<ProjectExplorerProps> = ({
                     <Td w="50%" overflow="hidden" textOverflow="ellipsis">{p.title}</Td>
                     <Td w="10%">{p.status}</Td>
                     <Td w="10%">{p.id}</Td>
-                    <Td w="10%" whiteSpace="nowrap">{new Date(p.startDate).toISOString().slice(0,10)}</Td>
                     <Td w="10%">{p.fundingScheme || '-'}</Td>
                     <Td w="10%">€{fmtNum(p.ecMaxContribution)}</Td>
@@ -318,7 +318,7 @@ const ProjectExplorer: React.FC<ProjectExplorerProps> = ({
               loadingText="Waiting…"
               size="md"
               px={6}
-              py={4}
             >
               Send
             </Button>

                     <Td w="50%" overflow="hidden" textOverflow="ellipsis">{p.title}</Td>
                     <Td w="10%">{p.status}</Td>
                     <Td w="10%">{p.id}</Td>
+                    <Td w="10%" whiteSpace="nowrap">{p.startDate.slice(0, 10)}</Td>
                     <Td w="10%">{p.fundingScheme || '-'}</Td>
                     <Td w="10%">€{fmtNum(p.ecMaxContribution)}</Td>
               loadingText="Waiting…"
               size="md"
               px={6}
+              py={3}
             >
               Send
             </Button>