Nawal20 commited on
Commit
53e9a7d
·
verified ·
1 Parent(s): 5b0a5c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -71
app.py CHANGED
@@ -24,100 +24,58 @@ import matplotlib.pyplot as plt
24
  import plotly.express as px
25
  import requests
26
 
27
- # Ensure required libraries are installed
28
- try:
29
- import torch
30
- except ImportError:
31
- subprocess.check_call(["pip", "install", "torch"])
32
- try:
33
- import pdfplumber
34
- except ImportError:
35
- subprocess.check_call(["pip", "install", "pdfplumber"])
36
- try:
37
- import plotly
38
- except ImportError:
39
- subprocess.check_call(["pip", "install", "plotly"])
40
 
41
  # NLP Model for summarization
42
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
43
 
 
 
 
 
 
 
 
44
  # Title and Description
45
  st.title("Automated Datasheet Summarizer")
46
  st.write("Upload a datasheet PDF or enter a component name to get simplified summaries, key specs, and visual insights.")
47
 
48
- # Error Handling for Unsupported Files
49
- def validate_pdf(file):
50
- try:
51
- with pdfplumber.open(file) as pdf:
52
- return pdf.pages[0].extract_text() is not None
53
- except Exception as e:
54
- return False
55
-
56
- # Function to fetch datasheet from an online database
57
- def fetch_datasheet(component_name):
58
- try:
59
- url = f"https://api.example.com/datasheets/{component_name}" # Placeholder API
60
- response = requests.get(url)
61
- response.raise_for_status()
62
- return response.content # Assuming API returns a PDF file
63
- except requests.exceptions.RequestException as e:
64
- st.error(f"Error fetching datasheet: {e}")
65
- return None
66
-
67
  # Input Options
68
  input_type = st.radio("Select Input Type:", ["Upload PDF", "Enter Component Name"])
69
 
70
  if input_type == "Upload PDF":
71
  uploaded_file = st.file_uploader("Upload a Datasheet PDF", type=["pdf"])
72
  if uploaded_file is not None:
73
- if validate_pdf(uploaded_file):
74
  with pdfplumber.open(uploaded_file) as pdf:
75
  text = "".join([page.extract_text() for page in pdf.pages])
76
- st.subheader("Extracted Text")
77
- st.text_area("Datasheet Text", value=text[:5000], height=300) # Show only first 5000 characters
78
 
 
 
 
79
  if st.button("Summarize PDF"):
80
- # Summarize the text
81
- summary = summarizer(text[:1024], max_length=300, min_length=50, do_sample=False)[0]["summary_text"]
82
- st.subheader("Simplified Summary")
83
- st.write(summary)
84
- else:
85
- st.error("The uploaded file is either unsupported or does not contain readable text.")
 
 
 
 
 
 
 
86
 
87
  elif input_type == "Enter Component Name":
88
  component_name = st.text_input("Enter Component Name")
89
  if component_name and st.button("Search and Summarize"):
90
- datasheet_pdf = fetch_datasheet(component_name)
91
- if datasheet_pdf:
92
- with open("temp_datasheet.pdf", "wb") as f:
93
- f.write(datasheet_pdf)
94
- with pdfplumber.open("temp_datasheet.pdf") as pdf:
95
- text = "".join([page.extract_text() for page in pdf.pages])
96
- summary = summarizer(text[:1024], max_length=300, min_length=50, do_sample=False)[0]["summary_text"]
97
- st.subheader("Simplified Summary")
98
- st.write(summary)
99
-
100
-
101
- if st.button("Summarize PDF"):
102
- # Clean and truncate text
103
- input_text = clean_text(text[:1024])
104
- st.text_area("Summarization Input", value=input_text, height=200)
105
-
106
- if len(input_text.strip()) == 0:
107
- st.error("No valid text extracted for summarization.")
108
- else:
109
- try:
110
- # Summarize the text
111
- summary = summarizer(input_text, max_length=300, min_length=50, do_sample=False)[0]["summary_text"]
112
- st.subheader("Simplified Summary")
113
- st.write(summary)
114
- except Exception as e:
115
- st.error(f"Error during summarization: {e}")
116
 
117
- # Dynamic Table Parsing
118
- if input_type == "Upload PDF" and uploaded_file is not None and validate_pdf(uploaded_file):
119
  if st.button("Generate Key Specifications Table"):
120
- # Mock parsing logic for demonstration
121
  parsed_data = {
122
  "Parameter": ["Voltage", "Current", "Power", "Efficiency"],
123
  "Value": ["3.3V", "2A", "6.6W", "85%"],
@@ -126,7 +84,7 @@ if input_type == "Upload PDF" and uploaded_file is not None and validate_pdf(upl
126
  st.subheader("Key Specifications")
127
  st.table(df)
128
 
129
- # Enhanced Visualization with Plotly
130
  st.subheader("Interactive Key Parameters Graph")
131
  fig = px.bar(df, x="Parameter", y="Value", title="Key Specifications", text="Value")
132
  st.plotly_chart(fig)
 
24
  import plotly.express as px
25
  import requests
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # NLP Model for summarization
29
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
30
 
31
+ # Helper function to clean text
32
+ def clean_text(text):
33
+ """Clean extracted text by removing non-ASCII characters and extra whitespace."""
34
+ text = text.encode("ascii", "ignore").decode()
35
+ text = re.sub(r"\s+", " ", text)
36
+ return text.strip()
37
+
38
  # Title and Description
39
  st.title("Automated Datasheet Summarizer")
40
  st.write("Upload a datasheet PDF or enter a component name to get simplified summaries, key specs, and visual insights.")
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Input Options
43
  input_type = st.radio("Select Input Type:", ["Upload PDF", "Enter Component Name"])
44
 
45
  if input_type == "Upload PDF":
46
  uploaded_file = st.file_uploader("Upload a Datasheet PDF", type=["pdf"])
47
  if uploaded_file is not None:
48
+ try:
49
  with pdfplumber.open(uploaded_file) as pdf:
50
  text = "".join([page.extract_text() for page in pdf.pages])
 
 
51
 
52
+ cleaned_text = clean_text(text[:1024]) # Clean and truncate text
53
+ st.text_area("Extracted Text (Preview)", value=cleaned_text, height=300)
54
+
55
  if st.button("Summarize PDF"):
56
+ if len(cleaned_text) == 0:
57
+ st.error("No valid text extracted for summarization.")
58
+ else:
59
+ try:
60
+ # Summarize the text
61
+ summary = summarizer(cleaned_text, max_length=300, min_length=50, do_sample=False)[0]["summary_text"]
62
+ st.subheader("Simplified Summary")
63
+ st.write(summary)
64
+ except Exception as e:
65
+ st.error(f"Error during summarization: {e}")
66
+
67
+ except Exception as e:
68
+ st.error(f"Error processing the PDF: {e}")
69
 
70
  elif input_type == "Enter Component Name":
71
  component_name = st.text_input("Enter Component Name")
72
  if component_name and st.button("Search and Summarize"):
73
+ st.error("Component search functionality is under development.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ # Dynamic Table Parsing (Example Data)
76
+ if input_type == "Upload PDF" and uploaded_file is not None:
77
  if st.button("Generate Key Specifications Table"):
78
+ # Mock parsing logic
79
  parsed_data = {
80
  "Parameter": ["Voltage", "Current", "Power", "Efficiency"],
81
  "Value": ["3.3V", "2A", "6.6W", "85%"],
 
84
  st.subheader("Key Specifications")
85
  st.table(df)
86
 
87
+ # Enhanced Visualization
88
  st.subheader("Interactive Key Parameters Graph")
89
  fig = px.bar(df, x="Parameter", y="Value", title="Key Specifications", text="Value")
90
  st.plotly_chart(fig)