Spaces:

DevBM
/

QGen

Runtime error

App Files Files Community

DevBM commited on Jul 8, 2024

Commit

dbb2b74

verified ·

1 Parent(s): 26381d0

reversing last update

Browse files

Files changed (1) hide show

app.py +5 -83

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import streamlit as st
 from transformers import T5ForConditionalGeneration, T5Tokenizer
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, LlamaForCausalLM
 import spacy
 import nltk
 from sklearn.feature_extraction.text import TfidfVectorizer
@@ -32,8 +31,6 @@ import uuid
 import time
 import asyncio
 import aiohttp
-import torch
-from dotenv import load_dotenv
 print("***************************************************************")
 st.set_page_config(
@@ -47,8 +44,6 @@ st.set_page_config(
 st.set_option('deprecation.showPyplotGlobalUse',False)
-HF_TOKEN = st.secrets['HF_TOKEN']
 class QuestionGenerationError(Exception):
     """Custom exception for question generation errors."""
     pass
@@ -90,7 +85,7 @@ def load_model(modelname):
 # Load Spacy Model
 @st.cache_resource
 def load_nlp_models():
-    nlp = spacy.load("en_core_web_lg")
     s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
     return nlp, s2v
@@ -103,13 +98,6 @@ def load_qa_models():
     spell = SpellChecker()
     return similarity_model, spell
-@st.cache_resource
-def load_llm_model():
-    model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = LlamaForCausalLM.from_pretrained(model_name,torch_dtype=torch.float16, device_map="auto")
-    return tokenizer, model
 with st.sidebar:
     select_model = st.selectbox("Select Model", ("T5-large","T5-small"))
 if select_model == "T5-large":
@@ -121,10 +109,6 @@ similarity_model, spell = load_qa_models()
 context_model = similarity_model
 sentence_model = similarity_model
 model, tokenizer = load_model(modelname)
-# llm_tokenizer, llm_model = load_llm_model()
-llm_tokenizer, llm_model = "meta-llama/Meta-Llama-3-8B-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct"
-pipe = pipeline("text-generation", model=llm_model, tokenizer=llm_tokenizer, max_new_tokens=200)
 # Info Section
 def display_info():
     st.sidebar.title("Information")
@@ -334,65 +318,7 @@ def get_word_type(word):
     doc = nlp(word)
     return doc[0].pos_
-def generate_text_with_llama(prompt):
-    full_prompt = f"""[INST] {prompt} [/INST]"""
-    result = pipe(prompt, temperature=0.7, do_sample=True)[0]['generated_text']
-    # Extract the generated part after the prompt
-    # return result.split('[/INST]')[-1].strip()
-    return result
-async def generate_options_with_llm(answer, context, question, n=4):
-    prompt = f"""Given the following context, question, and correct answer, generate {n-1} incorrect but plausible answer options. The options should be:
-1. Contextually related to the given context
-2. Grammatically consistent with the question
-3. Different from the correct answer
-4. Not explicitly mentioned in the given context
-Context: {context}
-Question: {question}
-Correct Answer: {answer}
-Provide the options in a comma-separated list.
-"""
-    try:
-        response = await asyncio.to_thread(generate_text_with_llama, prompt)
-        options = [option.strip() for option in response.split(',')]
-        options = [option for option in options if option.lower() != answer.lower()]
-        print(f"\n\nLLM Options are: {options}\n\n")
-        return options[:n-1]  # Ensure we only return n-1 options
-    except Exception as e:
-        st.error(f"Error generating options with LLM: {e}")
-        return []
 async def generate_options_async(answer, context, question, n=4):
-    options = [answer]
-    # Generate options using the language model
-    llm_options = await generate_options_with_llm(answer, context, question, n)
-    options.extend(llm_options)
-    # If we don't have enough options, fall back to previous methods
-    if len(options) < n:
-        semantic_options = await generate_semantic_options(answer, context, question, n - len(options))
-        options.extend(semantic_options)
-    # If we still don't have enough options, use the fallback method
-    while len(options) < n:
-        fallback_options = await get_fallback_options(answer, context)
-        for option in fallback_options:
-            if option not in options and ensure_grammatical_consistency(question, answer, option):
-                options.append(option)
-                if len(options) == n:
-                    break
-    # Shuffle the options
-    random.shuffle(options)
-    return options
-async def generate_semantic_options(answer, context, question, n=4):
     try:
         options = [answer]
@@ -409,7 +335,7 @@ async def generate_semantic_options(answer, context, question, n=4):
         for word in context_words:
             if get_word_type(word) == answer_type:
                 similarity = get_semantic_similarity(answer, word)
-                if 0.2 < similarity < 0.8:  # Adjust these thresholds as needed
                     similar_words.append((word, similarity))
         # Sort by similarity (descending) and take top n-1
@@ -519,16 +445,13 @@ async def generate_questions_async(text, num_questions, context_window_size, num
         st.error(f"An unexpected error occurred: {str(e)}")
         return []
-async def process_batch(batch, keywords, context_window_size, num_beams, use_llm_options):
     questions = []
     for text in batch:
         keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
         for keyword, context in keyword_sentence_mapping.items():
             question = await generate_question_async(context, keyword, num_beams)
-            if use_llm_options:
-                options = await generate_options_async(keyword, context, question)
-            else:
-                options =await generate_semantic_options(keyword, context, question)
             overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
             if overall_score >= 0.5:
                 questions.append({
@@ -604,7 +527,6 @@ def assess_question_quality(context, question, answer):
     return overall_score, relevance_score, complexity_score, spelling_correctness
 def main():
-    # load_dotenv()
     # Streamlit interface
     st.title(":blue[Question Generator System]")
     session_id = get_session_id()
@@ -654,7 +576,7 @@ def main():
         start_time = time.time()
         with st.spinner("Generating questions..."):
             try:
-                state['generated_questions'] = asyncio.run(generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords, use_llm_for_options))
                 if not state['generated_questions']:
                     st.warning("No questions were generated. The text might be too short or lack suitable content.")
                 else:

 import streamlit as st
 from transformers import T5ForConditionalGeneration, T5Tokenizer
 import spacy
 import nltk
 from sklearn.feature_extraction.text import TfidfVectorizer
 import time
 import asyncio
 import aiohttp
 print("***************************************************************")
 st.set_page_config(
 st.set_option('deprecation.showPyplotGlobalUse',False)
 class QuestionGenerationError(Exception):
     """Custom exception for question generation errors."""
     pass
 # Load Spacy Model
 @st.cache_resource
 def load_nlp_models():
+    nlp = spacy.load("en_core_web_md")
     s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
     return nlp, s2v
     spell = SpellChecker()
     return similarity_model, spell
 with st.sidebar:
     select_model = st.selectbox("Select Model", ("T5-large","T5-small"))
 if select_model == "T5-large":
 context_model = similarity_model
 sentence_model = similarity_model
 model, tokenizer = load_model(modelname)
 # Info Section
 def display_info():
     st.sidebar.title("Information")
     doc = nlp(word)
     return doc[0].pos_
 async def generate_options_async(answer, context, question, n=4):
     try:
         options = [answer]
         for word in context_words:
             if get_word_type(word) == answer_type:
                 similarity = get_semantic_similarity(answer, word)
+                if 0.3 < similarity < 0.8:  # Adjust these thresholds as needed
                     similar_words.append((word, similarity))
         # Sort by similarity (descending) and take top n-1
         st.error(f"An unexpected error occurred: {str(e)}")
         return []
+async def process_batch(batch, keywords, context_window_size, num_beams):
     questions = []
     for text in batch:
         keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
         for keyword, context in keyword_sentence_mapping.items():
             question = await generate_question_async(context, keyword, num_beams)
+            options = await generate_options_async(keyword, context, question)
             overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
             if overall_score >= 0.5:
                 questions.append({
     return overall_score, relevance_score, complexity_score, spelling_correctness
 def main():
     # Streamlit interface
     st.title(":blue[Question Generator System]")
     session_id = get_session_id()
         start_time = time.time()
         with st.spinner("Generating questions..."):
             try:
+                state['generated_questions'] = asyncio.run(generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords))
                 if not state['generated_questions']:
                     st.warning("No questions were generated. The text might be too short or lack suitable content.")
                 else: