Spaces:

Nawal20
/

Component_Advisor

Build error

File size: 6,652 Bytes

a6c80ce
3f9e9a0
a1a29f3
711f6fa
187e29a
3f9e9a0
bd03498
 
3f9e9a0
5c4effa
 
b558039
60c603f
768c84d
 
 
bd03498
60c603f
bd03498
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b558039
09ab229
60c603f
b558039
bd03498
3f9e9a0
bd03498
3f9e9a0
 
 
a1a29f3
3f9e9a0
 
 
 
 
 
 
 
 
 
 
 
 
b558039
3f9e9a0
 
768c84d
 
60c603f
 
 
 
 
 
768c84d
 
 
a1a29f3
768c84d
60c603f
 
 
 
 
768c84d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187e29a
 
 
 
 
 
 
768c84d
bd03498
60c603f
 
 
 
 
768c84d
 
 
 
60c603f
768c84d
bd03498
7743b31
 
 
768c84d
 
60c603f
bd03498
768c84d
bd03498
7743b31
bd03498
 
3f9e9a0
73f4b75
3f9e9a0
 
bd03498
187e29a
3f9e9a0
 
 
 
 
 
 
187e29a
09ab229
3f9e9a0
09ab229
 
187e29a
3f9e9a0
8ba33e0
09ab229
 
 
 
 
 
0431ea8
 
 
 
 
 
bd03498
768c84d
bd03498
 
 
09ab229
 
187e29a

import streamlit as st
import requests
import re
from pypdf import PdfReader
from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline

# Digi-Key API and credentials
TOKEN_URL = "https://api.digikey.com/v1/oauth2/token"
API_URL = "https://api.digikey.com/products/v4/search/keyword"
CLIENT_ID = "K9d4a2AaGwQcoAvdNDZVYEOB3sqL4bMg"  # Replace with your Digi-Key Client ID
CLIENT_SECRET = "NxzuxY67eJssGDkA"  # Replace with your Digi-Key Client Secret


# NLP Model for summarization
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

# Function to fetch access token
@st.cache_data(ttl=3500)
def fetch_access_token() -> str:
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
    }
    data = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "client_credentials",
    }
    response = requests.post(TOKEN_URL, headers=headers, data=data)
    if response.status_code == 200:
        token_data = response.json()
        return token_data["access_token"]
    else:
        st.error(f"Failed to retrieve access token: {response.status_code} - {response.text}")
        st.stop()

# Function to make API requests to Digi-Key
def search_digikey_components(keywords: str) -> dict:
    access_token = fetch_access_token()
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "application/json",
        "X-DIGIKEY-Client-Id": CLIENT_ID,
    }
    payload = {
        "Keywords": keywords,
        "Limit": 50,
        "Offset": 0,
        "FilterOptionsRequest": {
            "MarketPlaceFilter": "NoFilter",
        },
        "SortOptions": {
            "Field": "None",
            "SortOrder": "Ascending",
        },
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()
    else:
        st.error(f"API request failed: {response.status_code} - {response.text}")
        return {}

# Function to extract text from a PDF datasheet
def extract_text_from_pdf(pdf_url: str) -> str:
    try:
        response = requests.get(pdf_url)
        if response.status_code == 200:
            with open("datasheet.pdf", "wb") as file:
                file.write(response.content)

            # Extract text from the PDF using pdfplumber
            with pdfplumber.open("datasheet.pdf") as pdf:
                text = "".join([page.extract_text() for page in pdf.pages])

            return text
        else:
            return "Unable to fetch datasheet."
    except Exception as e:
        return f"Error extracting text: {str(e)}"

# Helper function to clean text
def clean_text(text):
    """Clean extracted text by removing non-ASCII characters and extra whitespace."""
    text = text.encode("ascii", "ignore").decode()
    text = re.sub(r"\s+", " ", text)
    return text.strip()

# Function to summarize the datasheet
def summarize_datasheet(text: str) -> str:
    cleaned_text = clean_text(text[:1024])  # Clean and truncate text
    if len(cleaned_text) == 0:
        return "No valid text extracted for summarization."
    
    try:
        # Summarize the text using the summarization model
        summary = summarizer(cleaned_text, max_length=300, min_length=50, do_sample=False)[0]["summary_text"]
        return summary
    except Exception as e:
        return f"Error during summarization: {e}"

# Load T5 model and tokenizer
@st.cache_resource
def load_t5_model():
    tokenizer = T5Tokenizer.from_pretrained("t5-small")
    model = T5ForConditionalGeneration.from_pretrained("t5-small")
    return tokenizer, model

# Generate advice for a component using extracted datasheet data
def generate_advice(product: dict, tokenizer, model) -> str:
    # Fetch datasheet URL
    datasheet_url = product.get("DatasheetUrl")
    datasheet_text = ""
    
    if datasheet_url:
        datasheet_text = extract_text_from_pdf(datasheet_url)
    
    # Summarize datasheet text
    summarized_datasheet = summarize_datasheet(datasheet_text)
    
    # Use description if available, else use a default string
    description = product.get("Description", {}).get("DetailedDescription", "No description available.")
    category = product.get("Category", {}).get("Name", "Unknown Category")
    manufacturer = product.get("Manufacturer", {}).get("Name", "Unknown Manufacturer")
    
    # Generate advice using the summarized datasheet and description
    prompt = f"Based on the datasheet summary: {summarized_datasheet}. Description: {description}. Category: {category}. Manufacturer: {manufacturer}. Provide usage advice, specifications, and recommended components that may work well with it."
    
    inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(inputs, max_length=150, num_beams=4, temperature=1.5, top_k=50, early_stopping=True)
    advice = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return advice

# Streamlit app interface
st.title("Component Selection Advisor")
st.write(
    """
    This app helps circuit designers search for electronic components using the Digi-Key API 
    and provides advice using a pretrained T5 model. It can also extract information from datasheets.
    """
)

# Input for the user
keywords = st.text_input("Enter the name or keyword of the component:", "")

if keywords:
    st.write(f"Searching for components matching: {keywords}...")
    data = search_digikey_components(keywords)
    
    if "Products" in data and data["Products"]:
        st.header("Search Results")
        tokenizer, model = load_t5_model()
        
        for product in data["Products"]:
            st.subheader(product["Description"]["ProductDescription"])
            st.write(f"**Manufacturer**: {product['Manufacturer']['Name']}")
            st.write(f"**Product Number**: {product['ManufacturerProductNumber']}")
            st.write(f"**Unit Price**: ${product['UnitPrice']}")
            st.write(f"[Datasheet]({product['DatasheetUrl']})")
            st.write(f"[Product Link]({product['ProductUrl']})")

            # Check if PhotoUrl exists and is not None
            if product.get("PhotoUrl"):
                st.image(product["PhotoUrl"], width=200)
            else:
                st.write("_Image not available_")

            # Generate advice for the product
            advice = generate_advice(product, tokenizer, model)
            st.write(f"**AI Advice:** {advice}")

            st.write("---")
    else:
        st.warning("No components found. Try using different keywords.")