Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import time | |
| import PyPDF2 | |
| from docx import Document | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| from unsloth import FastLanguageModel | |
| from transformers import AutoTokenizer | |
| # Load environment variables | |
| load_dotenv() | |
| # Avatars and bios | |
| USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png" | |
| BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg" | |
| ATALIBA_BIO = """ | |
| **I am Ataliba Miguel's Digital Twin** π€ | |
| **Background:** | |
| - π Mechanical Engineering (BSc) | |
| - β½ Oil & Gas Engineering (MSc Specialization) | |
| - π§ 17+ years in Oil & Gas Industry | |
| - π Current: Topside Inspection Methods Engineer @ TotalEnergies | |
| - π€ AI Practitioner Specialist | |
| - π Founder of ValonyLabs (AI solutions for industrial corrosion, retail analytics, and KPI monitoring) | |
| **Capabilities:** | |
| - Technical document analysis | |
| - Engineering insights | |
| - AI-powered problem solving | |
| - Cross-domain knowledge integration | |
| Ask me about engineering challenges, AI applications, or industry best practices! | |
| """ | |
| # UI Setup | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.cdnfonts.com/css/tw-cen-mt'); | |
| * { font-family: 'Tw Cen MT', sans-serif; } | |
| .st-emotion-cache-1y4p8pa { padding: 2rem 1rem; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.title("π Ataliba o Agent Nerdx π") | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("β‘οΈ Hugging Face Model Loaded") | |
| st.markdown("Model: `amiguel/unsloth_finetune_test` with LoRA") | |
| uploaded_file = st.file_uploader("Upload technical documents", type=["pdf", "docx", "xlsx", "xlsm"]) | |
| # Session state | |
| if "file_context" not in st.session_state: | |
| st.session_state.file_context = None | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| # File parser | |
| def parse_file(file): | |
| try: | |
| if file.type == "application/pdf": | |
| reader = PyPDF2.PdfReader(file) | |
| return "\n".join([page.extract_text() for page in reader.pages]) | |
| elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": | |
| doc = Document(file) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| elif file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]: | |
| df = pd.read_excel(file) | |
| return df.to_string() | |
| except Exception as e: | |
| st.error(f"Error processing file: {str(e)}") | |
| return None | |
| # Process file | |
| if uploaded_file and not st.session_state.file_context: | |
| st.session_state.file_context = parse_file(uploaded_file) | |
| if st.session_state.file_context: | |
| st.sidebar.success("β Document loaded successfully") | |
| # Load model | |
| def load_unsloth_model(): | |
| base_model = "unsloth/llama-3-8b-Instruct-bnb-4bit" | |
| adapter = "amiguel/unsloth_finetune_test" | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=base_model, | |
| max_seq_length=2048, | |
| dtype=None, | |
| load_in_4bit=True | |
| ) | |
| model.load_adapter(adapter) | |
| FastLanguageModel.for_inference(model) | |
| return model, tokenizer | |
| # Generate response | |
| def generate_response(prompt): | |
| bio_triggers = ['who are you', 'ataliba', 'yourself', 'skilled at', | |
| 'background', 'experience', 'valonylabs', 'totalenergies'] | |
| if any(trigger in prompt.lower() for trigger in bio_triggers): | |
| for line in ATALIBA_BIO.split('\n'): | |
| yield line + '\n' | |
| time.sleep(0.1) | |
| return | |
| try: | |
| model, tokenizer = load_unsloth_model() | |
| context = st.session_state.file_context or "" | |
| full_prompt = f"You are an expert in life balance and general knowledge. Use the context to answer precisely.\nContext: {context}\n\nQuestion: {prompt}" | |
| inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device) | |
| outputs = model.generate(**inputs, max_new_tokens=256, do_sample=False) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| for line in response.split('\n'): | |
| yield line + '\n' | |
| time.sleep(0.05) | |
| except Exception as e: | |
| yield f"β οΈ Model Error: {str(e)}" | |
| # Chat interface | |
| for msg in st.session_state.chat_history: | |
| with st.chat_message(msg["role"], avatar=USER_AVATAR if msg["role"] == "user" else BOT_AVATAR): | |
| st.markdown(msg["content"]) | |
| if prompt := st.chat_input("Ask about documents or technical matters..."): | |
| st.session_state.chat_history.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user", avatar=USER_AVATAR): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant", avatar=BOT_AVATAR): | |
| response_placeholder = st.empty() | |
| full_response = "" | |
| for chunk in generate_response(prompt): | |
| full_response += chunk | |
| response_placeholder.markdown(full_response + "β") | |
| response_placeholder.markdown(full_response) | |
| st.session_state.chat_history.append({"role": "assistant", "content": full_response}) | |