Spaces:

VGollapalli0816
/

NLP_PROJECT_NEW

Build error

App Files Files Community

NLP_PROJECT_NEW / app.py

Preethamreddy799

new

ca84000 about 1 year ago

raw

history blame contribute delete

5.34 kB

	import streamlit as st
	import joblib
	import re
	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	# Download necessary NLTK data if not already done
	nltk.download('punkt')
	nltk.download('stopwords')
	nltk.download('wordnet')
	nltk.download('omw-1.4')

	from git import Repo
	import joblib
	import os

	# Function to load models using GitPython
	def load_model_with_git(repo_url, file_path):
	repo_dir = "temp_repo" # Directory to clone into
	if not os.path.exists(repo_dir):
	Repo.clone_from(repo_url, repo_dir) # Clone the repository

	model_path = os.path.join(repo_dir, file_path)
	if os.path.exists(model_path):
	model = joblib.load(model_path)
	return model
	else:
	raise FileNotFoundError(f"The file {file_path} does not exist in the repository.")

	# Repository URL and model file paths
	repo_url = "https://github.com/vgollapalli0816/NLP-Testcases.git"
	models_to_load = {
	"knn_model_expected": "knn_model_expected.joblib",
	"knn_model_steps": "knn_model_steps.joblib",
	"tfidf_vectorizer": "tfidf_vectorizer.pkl"
	}

	# Load each model
	loaded_models = {}
	try:
	for model_name, file_path in models_to_load.items():
	loaded_models[model_name] = load_model_with_git(repo_url, file_path)
	print(f"{model_name} loaded successfully.")
	except Exception as e:
	print(f"An error occurred: {e}")

	# Optional cleanup: Remove the cloned repo after use
	import shutil
	if os.path.exists("temp_repo"):
	shutil.rmtree("temp_repo")


	lemmatizer = WordNetLemmatizer()

	# Function to preprocess input text
	def preprocess_text(text):
	# Remove special characters and extra spaces
	text = re.sub(r'[^\w\s]', '', text) # Remove special characters
	text = re.sub(r'\d+', '', text) # Remove numbers
	text = text.strip() # Remove leading/trailing whitespaces

	# 2. Tokenize the text
	tokens = word_tokenize(text)

	# 3. Lowercasing
	tokens = [token.lower() for token in tokens]

	# 4. Remove stopwords
	stop_words = set(stopwords.words('english'))
	tokens = [token for token in tokens if token not in stop_words]

	# 5. Lemmatization
	tokens_lemmatized = [lemmatizer.lemmatize(token) for token in tokens]

	# Rejoin tokens into cleaned text string
	cleaned_text = ' '.join(tokens_lemmatized)
	return cleaned_text

	# Function to generate test steps using the trained KNN model
	def generate_output(acceptance_criteria, model, vectorizer):
	# Preprocess the acceptance criteria text
	cleaned_text = preprocess_text(acceptance_criteria)

	# Generate embeddings using the same embedding model
	acceptance_criteria_vector = vectorizer.transform([cleaned_text])

	# Use the KNN model to predict the closest test steps (using the generated embeddings)
	predicted_test_steps = model.predict(acceptance_criteria_vector)

	# Return the predicted test steps (you can customize this logic further)
	action_keywords = ['user', 'tap', 'verify', 'click', 'navigate']

	# Use regex to insert a special delimiter before each action keyword
	for keyword in action_keywords:
	predicted_test_steps[0] = re.sub(r'(\s?)' + keyword, r'.\1' + keyword, predicted_test_steps[0], flags=re.IGNORECASE)

	# Split the text into individual points using the period as delimiter
	test_steps = predicted_test_steps[0].split('.')

	# Remove any empty or unnecessary points
	test_steps = [step.strip() for step in test_steps if step.strip()]

	# Format the list of points for display (numbering each test step)
	formatted_test_steps = "\n".join([f"{i+1}. {step.strip()}" for i, step in enumerate(test_steps)])

	return formatted_test_steps

	def generate_output_result(acceptance_criteria, model, vectorizer):
	# Preprocess the acceptance criteria text
	cleaned_text = preprocess_text(acceptance_criteria)

	# Generate embeddings using the same embedding model
	acceptance_criteria_vector = vectorizer.transform([cleaned_text])

	# Use the KNN model to predict the closest test steps (using the generated embeddings)
	predicted_test_steps = model.predict(acceptance_criteria_vector)

	return predicted_test_steps

	# Streamlit UI
	st.title("Test Case Steps Generator")
	st.write("This app generates test steps based on Test Case Acceptance Criteria.")
	# User input
	acceptance_criteria = st.text_area("Enter Test Case Acceptance Criteria")

	knn_model_results = loaded_models.get("knn_model_expected")
	knn_model_steps = loaded_models.get("knn_model_steps")
	tfidf_vectorizer = loaded_models.get("tfidf_vectorizer")

	# Generate test steps and result when the user enters acceptance criteria
	if st.button("Generate Test Steps and Results"):
	if acceptance_criteria:
	test_steps = generate_output(acceptance_criteria, knn_model_steps, tfidf_vectorizer)
	expected_results = generate_output_result(acceptance_criteria, knn_model_results, tfidf_vectorizer)
	st.subheader("Generated Test Steps")
	st.write(test_steps)

	st.subheader("Generated Expected Results")
	expected_results_str = " ".join(expected_results)
	st.markdown(expected_results_str)
	else:
	st.warning("Please enter the Acceptance Criteria text first.")