import streamlit as st import requests import re from pypdf import PdfReader from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline # Digi-Key API and credentials TOKEN_URL = "https://api.digikey.com/v1/oauth2/token" API_URL = "https://api.digikey.com/products/v4/search/keyword" CLIENT_ID = "K9d4a2AaGwQcoAvdNDZVYEOB3sqL4bMg" # Replace with your Digi-Key Client ID CLIENT_SECRET = "NxzuxY67eJssGDkA" # Replace with your Digi-Key Client Secret # NLP Model for summarization summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # Function to fetch access token @st.cache_data(ttl=3500) def fetch_access_token() -> str: headers = { "Content-Type": "application/x-www-form-urlencoded", } data = { "client_id": CLIENT_ID, "client_secret": CLIENT_SECRET, "grant_type": "client_credentials", } response = requests.post(TOKEN_URL, headers=headers, data=data) if response.status_code == 200: token_data = response.json() return token_data["access_token"] else: st.error(f"Failed to retrieve access token: {response.status_code} - {response.text}") st.stop() # Function to make API requests to Digi-Key def search_digikey_components(keywords: str) -> dict: access_token = fetch_access_token() headers = { "Authorization": f"Bearer {access_token}", "Content-Type": "application/json", "X-DIGIKEY-Client-Id": CLIENT_ID, } payload = { "Keywords": keywords, "Limit": 50, "Offset": 0, "FilterOptionsRequest": { "MarketPlaceFilter": "NoFilter", }, "SortOptions": { "Field": "None", "SortOrder": "Ascending", }, } response = requests.post(API_URL, headers=headers, json=payload) if response.status_code == 200: return response.json() else: st.error(f"API request failed: {response.status_code} - {response.text}") return {} # Function to extract text from a PDF datasheet def extract_text_from_pdf(pdf_url: str) -> str: try: response = requests.get(pdf_url) if response.status_code == 200: with open("datasheet.pdf", "wb") as file: file.write(response.content) # Extract text from the PDF using pdfplumber with pdfplumber.open("datasheet.pdf") as pdf: text = "".join([page.extract_text() for page in pdf.pages]) return text else: return "Unable to fetch datasheet." except Exception as e: return f"Error extracting text: {str(e)}" # Helper function to clean text def clean_text(text): """Clean extracted text by removing non-ASCII characters and extra whitespace.""" text = text.encode("ascii", "ignore").decode() text = re.sub(r"\s+", " ", text) return text.strip() # Function to summarize the datasheet def summarize_datasheet(text: str) -> str: cleaned_text = clean_text(text[:1024]) # Clean and truncate text if len(cleaned_text) == 0: return "No valid text extracted for summarization." try: # Summarize the text using the summarization model summary = summarizer(cleaned_text, max_length=300, min_length=50, do_sample=False)[0]["summary_text"] return summary except Exception as e: return f"Error during summarization: {e}" # Load T5 model and tokenizer @st.cache_resource def load_t5_model(): tokenizer = T5Tokenizer.from_pretrained("t5-small") model = T5ForConditionalGeneration.from_pretrained("t5-small") return tokenizer, model # Generate advice for a component using extracted datasheet data def generate_advice(product: dict, tokenizer, model) -> str: # Fetch datasheet URL datasheet_url = product.get("DatasheetUrl") datasheet_text = "" if datasheet_url: datasheet_text = extract_text_from_pdf(datasheet_url) # Summarize datasheet text summarized_datasheet = summarize_datasheet(datasheet_text) # Use description if available, else use a default string description = product.get("Description", {}).get("DetailedDescription", "No description available.") category = product.get("Category", {}).get("Name", "Unknown Category") manufacturer = product.get("Manufacturer", {}).get("Name", "Unknown Manufacturer") # Generate advice using the summarized datasheet and description prompt = f"Based on the datasheet summary: {summarized_datasheet}. Description: {description}. Category: {category}. Manufacturer: {manufacturer}. Provide usage advice, specifications, and recommended components that may work well with it." inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True) outputs = model.generate(inputs, max_length=150, num_beams=4, temperature=1.5, top_k=50, early_stopping=True) advice = tokenizer.decode(outputs[0], skip_special_tokens=True) return advice # Streamlit app interface st.title("Component Selection Advisor") st.write( """ This app helps circuit designers search for electronic components using the Digi-Key API and provides advice using a pretrained T5 model. It can also extract information from datasheets. """ ) # Input for the user keywords = st.text_input("Enter the name or keyword of the component:", "") if keywords: st.write(f"Searching for components matching: {keywords}...") data = search_digikey_components(keywords) if "Products" in data and data["Products"]: st.header("Search Results") tokenizer, model = load_t5_model() for product in data["Products"]: st.subheader(product["Description"]["ProductDescription"]) st.write(f"**Manufacturer**: {product['Manufacturer']['Name']}") st.write(f"**Product Number**: {product['ManufacturerProductNumber']}") st.write(f"**Unit Price**: ${product['UnitPrice']}") st.write(f"[Datasheet]({product['DatasheetUrl']})") st.write(f"[Product Link]({product['ProductUrl']})") # Check if PhotoUrl exists and is not None if product.get("PhotoUrl"): st.image(product["PhotoUrl"], width=200) else: st.write("_Image not available_") # Generate advice for the product advice = generate_advice(product, tokenizer, model) st.write(f"**AI Advice:** {advice}") st.write("---") else: st.warning("No components found. Try using different keywords.")