| |
| from transformers import AutoModel, AutoTokenizer |
| import torch |
| import streamlit as st |
| import subprocess |
| import sys |
|
|
| st.title("Package Installation Test") |
|
|
| |
| st.write(f"Python version: {sys.version}") |
|
|
| |
| try: |
| st.write("Attempting to install transformers...") |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers"]) |
| st.success("Transformers package installed successfully!") |
| except Exception as e: |
| st.error(f"Error installing transformers: {str(e)}") |
|
|
| |
| st.write("Installed packages:") |
| try: |
| installed_packages = subprocess.check_output([sys.executable, "-m", "pip", "list"]).decode() |
| st.code(installed_packages) |
| except Exception as e: |
| st.error(f"Error listing packages: {str(e)}") |
|
|
| |
| st.set_page_config( |
| page_title="Document Chatbot", |
| layout="centered", |
| initial_sidebar_state="collapsed" |
| ) |
|
|
| @st.cache_resource |
| def load_model(): |
| model_name = "distilbert-base-uncased" |
| return ( |
| AutoModel.from_pretrained(model_name, device_map="auto"), |
| AutoTokenizer.from_pretrained(model_name) |
| ) |
|
|
| def embed_document(document: str, model, tokenizer) -> torch.Tensor: |
| inputs = tokenizer( |
| document, |
| return_tensors="pt", |
| truncation=True, |
| max_length=512, |
| padding=True |
| ) |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| return outputs.last_hidden_state[:, 0, :] |
|
|
| def answer_question(question: str, document_embeddings: torch.Tensor, model, tokenizer) -> str: |
| inputs = tokenizer( |
| question, |
| return_tensors="pt", |
| truncation=True, |
| max_length=512, |
| padding=True |
| ) |
| |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| question_embeddings = outputs.last_hidden_state[:, 0, :] |
| |
| similarity = torch.cosine_similarity(document_embeddings, question_embeddings) |
| |
| if similarity.item() > 0.5: |
| return f"Similarity score: {similarity.item():.2f}" |
| return "Sorry, I couldn't find a relevant answer in the document." |
|
|
| def main(): |
| st.title("Document Chatbot") |
| |
| |
| try: |
| model, tokenizer = load_model() |
| except Exception as e: |
| st.error(f"Error loading model: {str(e)}") |
| return |
| |
| |
| document_file = st.file_uploader( |
| "Upload a text document (txt)", |
| type=["txt"], |
| help="Please upload a text file to analyze" |
| ) |
| |
| if document_file is not None: |
| try: |
| document = document_file.read().decode("utf-8") |
| st.success("Document uploaded successfully!") |
| |
| |
| document_embeddings = embed_document(document, model, tokenizer) |
| |
| |
| st.subheader("Ask a question") |
| question = st.text_input("Enter your question about the document:") |
| |
| if question: |
| with st.spinner("Finding answer..."): |
| answer = answer_question(question, document_embeddings, model, tokenizer) |
| st.write(answer) |
| |
| except Exception as e: |
| st.error(f"Error processing document: {str(e)}") |
|
|
| if __name__ == "__main__": |
| main() |