Nawal20's picture
Update app.py
73f4b75 verified
import streamlit as st
import requests
import re
from pypdf import PdfReader
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline
# Digi-Key API and credentials
TOKEN_URL = "https://api.digikey.com/v1/oauth2/token"
API_URL = "https://api.digikey.com/products/v4/search/keyword"
CLIENT_ID = "K9d4a2AaGwQcoAvdNDZVYEOB3sqL4bMg" # Replace with your Digi-Key Client ID
CLIENT_SECRET = "NxzuxY67eJssGDkA" # Replace with your Digi-Key Client Secret
# NLP Model for summarization
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
# Function to fetch access token
@st.cache_data(ttl=3500)
def fetch_access_token() -> str:
headers = {
"Content-Type": "application/x-www-form-urlencoded",
}
data = {
"client_id": CLIENT_ID,
"client_secret": CLIENT_SECRET,
"grant_type": "client_credentials",
}
response = requests.post(TOKEN_URL, headers=headers, data=data)
if response.status_code == 200:
token_data = response.json()
return token_data["access_token"]
else:
st.error(f"Failed to retrieve access token: {response.status_code} - {response.text}")
st.stop()
# Function to make API requests to Digi-Key
def search_digikey_components(keywords: str) -> dict:
access_token = fetch_access_token()
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json",
"X-DIGIKEY-Client-Id": CLIENT_ID,
}
payload = {
"Keywords": keywords,
"Limit": 50,
"Offset": 0,
"FilterOptionsRequest": {
"MarketPlaceFilter": "NoFilter",
},
"SortOptions": {
"Field": "None",
"SortOrder": "Ascending",
},
}
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 200:
return response.json()
else:
st.error(f"API request failed: {response.status_code} - {response.text}")
return {}
# Function to extract text from a PDF datasheet
def extract_text_from_pdf(pdf_url: str) -> str:
try:
response = requests.get(pdf_url)
if response.status_code == 200:
with open("datasheet.pdf", "wb") as file:
file.write(response.content)
# Extract text from the PDF using pdfplumber
with pdfplumber.open("datasheet.pdf") as pdf:
text = "".join([page.extract_text() for page in pdf.pages])
return text
else:
return "Unable to fetch datasheet."
except Exception as e:
return f"Error extracting text: {str(e)}"
# Helper function to clean text
def clean_text(text):
"""Clean extracted text by removing non-ASCII characters and extra whitespace."""
text = text.encode("ascii", "ignore").decode()
text = re.sub(r"\s+", " ", text)
return text.strip()
# Function to summarize the datasheet
def summarize_datasheet(text: str) -> str:
cleaned_text = clean_text(text[:1024]) # Clean and truncate text
if len(cleaned_text) == 0:
return "No valid text extracted for summarization."
try:
# Summarize the text using the summarization model
summary = summarizer(cleaned_text, max_length=300, min_length=50, do_sample=False)[0]["summary_text"]
return summary
except Exception as e:
return f"Error during summarization: {e}"
# Load T5 model and tokenizer
@st.cache_resource
def load_t5_model():
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")
return tokenizer, model
# Generate advice for a component using extracted datasheet data
def generate_advice(product: dict, tokenizer, model) -> str:
# Fetch datasheet URL
datasheet_url = product.get("DatasheetUrl")
datasheet_text = ""
if datasheet_url:
datasheet_text = extract_text_from_pdf(datasheet_url)
# Summarize datasheet text
summarized_datasheet = summarize_datasheet(datasheet_text)
# Use description if available, else use a default string
description = product.get("Description", {}).get("DetailedDescription", "No description available.")
category = product.get("Category", {}).get("Name", "Unknown Category")
manufacturer = product.get("Manufacturer", {}).get("Name", "Unknown Manufacturer")
# Generate advice using the summarized datasheet and description
prompt = f"Based on the datasheet summary: {summarized_datasheet}. Description: {description}. Category: {category}. Manufacturer: {manufacturer}. Provide usage advice, specifications, and recommended components that may work well with it."
inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(inputs, max_length=150, num_beams=4, temperature=1.5, top_k=50, early_stopping=True)
advice = tokenizer.decode(outputs[0], skip_special_tokens=True)
return advice
# Streamlit app interface
st.title("Component Selection Advisor")
st.write(
"""
This app helps circuit designers search for electronic components using the Digi-Key API
and provides advice using a pretrained T5 model. It can also extract information from datasheets.
"""
)
# Input for the user
keywords = st.text_input("Enter the name or keyword of the component:", "")
if keywords:
st.write(f"Searching for components matching: {keywords}...")
data = search_digikey_components(keywords)
if "Products" in data and data["Products"]:
st.header("Search Results")
tokenizer, model = load_t5_model()
for product in data["Products"]:
st.subheader(product["Description"]["ProductDescription"])
st.write(f"**Manufacturer**: {product['Manufacturer']['Name']}")
st.write(f"**Product Number**: {product['ManufacturerProductNumber']}")
st.write(f"**Unit Price**: ${product['UnitPrice']}")
st.write(f"[Datasheet]({product['DatasheetUrl']})")
st.write(f"[Product Link]({product['ProductUrl']})")
# Check if PhotoUrl exists and is not None
if product.get("PhotoUrl"):
st.image(product["PhotoUrl"], width=200)
else:
st.write("_Image not available_")
# Generate advice for the product
advice = generate_advice(product, tokenizer, model)
st.write(f"**AI Advice:** {advice}")
st.write("---")
else:
st.warning("No components found. Try using different keywords.")