Spaces:

mohammedriza-rahman
/

document_analyser

Sleeping

App Files Files Community

document_analyser / app.py

mohammedriza-rahman

Update app.py

20ef73e verified over 1 year ago

raw

history blame contribute delete

3.51 kB

	# import streamlit as st
	from transformers import AutoModel, AutoTokenizer
	import torch
	import streamlit as st
	import subprocess
	import sys

	st.title("Package Installation Test")

	# Display Python version
	st.write(f"Python version: {sys.version}")

	# Try to install transformers
	try:
	st.write("Attempting to install transformers...")
	subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"])
	st.success("Transformers package installed successfully!")
	except Exception as e:
	st.error(f"Error installing transformers: {str(e)}")

	# List installed packages
	st.write("Installed packages:")
	try:
	installed_packages = subprocess.check_output([sys.executable, "-m", "pip", "list"]).decode()
	st.code(installed_packages)
	except Exception as e:
	st.error(f"Error listing packages: {str(e)}")

	# Page config
	st.set_page_config(
	page_title="Document Chatbot",
	layout="centered", # Changed to centered for better mobile view
	initial_sidebar_state="collapsed"
	)

	@st.cache_resource # Use cache_resource instead of session state for HF Spaces
	def load_model():
	model_name = "distilbert-base-uncased"
	return (
	AutoModel.from_pretrained(model_name, device_map="auto"),
	AutoTokenizer.from_pretrained(model_name)
	)

	def embed_document(document: str, model, tokenizer) -> torch.Tensor:
	inputs = tokenizer(
	document,
	return_tensors="pt",
	truncation=True,
	max_length=512,
	padding=True
	)
	with torch.no_grad():
	outputs = model(**inputs)
	return outputs.last_hidden_state[:, 0, :]

	def answer_question(question: str, document_embeddings: torch.Tensor, model, tokenizer) -> str:
	inputs = tokenizer(
	question,
	return_tensors="pt",
	truncation=True,
	max_length=512,
	padding=True
	)

	with torch.no_grad():
	outputs = model(**inputs)
	question_embeddings = outputs.last_hidden_state[:, 0, :]

	similarity = torch.cosine_similarity(document_embeddings, question_embeddings)

	if similarity.item() > 0.5:
	return f"Similarity score: {similarity.item():.2f}"
	return "Sorry, I couldn't find a relevant answer in the document."

	def main():
	st.title("Document Chatbot")

	# Load model and tokenizer
	try:
	model, tokenizer = load_model()
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	return

	# File upload - restrict to txt files for simplicity
	document_file = st.file_uploader(
	"Upload a text document (txt)",
	type=["txt"],
	help="Please upload a text file to analyze"
	)

	if document_file is not None:
	try:
	document = document_file.read().decode("utf-8")
	st.success("Document uploaded successfully!")

	# Create document embeddings
	document_embeddings = embed_document(document, model, tokenizer)

	# Question input
	st.subheader("Ask a question")
	question = st.text_input("Enter your question about the document:")

	if question:
	with st.spinner("Finding answer..."):
	answer = answer_question(question, document_embeddings, model, tokenizer)
	st.write(answer)

	except Exception as e:
	st.error(f"Error processing document: {str(e)}")

	if __name__ == "__main__":
	main()