Spaces:

Jayesh13
/

HI_SCBL

Runtime error

App Files Files Community

Jayesh13 commited on Oct 11, 2024

Commit

7d03065

verified ·

1 Parent(s): ef8ec9c

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -20

app.py CHANGED Viewed

@@ -54,32 +54,31 @@ def process_csv(file):
     return homorepeats, sequence_data
-# Function to generate and download Excel workbook with file names as separators
 def create_excel(sequences_data, homorepeats, filenames):
     output = BytesIO()
     workbook = xlsxwriter.Workbook(output, {'in_memory': True})
-    worksheet = workbook.add_worksheet()
-    row = 0
-    # Iterate through sequences data grouped by filenames
     for file_index, file_data in enumerate(sequences_data):
         filename = filenames[file_index]
-        # Write filename as a separator row
-        worksheet.write(row, 0, f"File: {filename}")
-        row += 1
         # Write the header for the current file
-        worksheet.write(row, 0, "Entry ID")
-        worksheet.write(row, 1, "Protein Name")
         col = 2
         for repeat in sorted(homorepeats):
-            worksheet.write(row, col, repeat)
             col += 1
-        row += 1
         # Write data for each sequence in the current file
         for entry_id, protein_name, freq in file_data:
             worksheet.write(row, 0, entry_id)
             worksheet.write(row, 1, protein_name)
@@ -89,9 +88,6 @@ def create_excel(sequences_data, homorepeats, filenames):
                 col += 1
             row += 1
-        # Add an empty row as a separator between files
-        row += 1
     workbook.close()
     output.seek(0)
     return output
@@ -99,8 +95,8 @@ def create_excel(sequences_data, homorepeats, filenames):
 # Streamlit UI components
 st.title("Protein Homorepeat Analysis")
-# Step 1: Upload CSV Files
-uploaded_files = st.file_uploader("Upload CSV files", accept_multiple_files=True, type=["csv"])
 # Step 2: Process files and display results
 if uploaded_files:
@@ -109,7 +105,8 @@ if uploaded_files:
     filenames = []
     for file in uploaded_files:
-        homorepeats, sequence_data = process_csv(file)
         if homorepeats is not None:
             all_homorepeats.update(homorepeats)
             all_sequences_data.append(sequence_data)
@@ -141,4 +138,21 @@ if uploaded_files:
                     rows.append(row)
             result_df = pd.DataFrame(rows)
-            st.dataframe(result_df)

     return homorepeats, sequence_data
+import pandas as pd
+import streamlit as st
+from io import BytesIO
+import xlsxwriter
+# Function to generate and download Excel workbook with separate sheets for each input file
 def create_excel(sequences_data, homorepeats, filenames):
     output = BytesIO()
     workbook = xlsxwriter.Workbook(output, {'in_memory': True})
+    # Iterate through sequences data grouped by filenames and create separate sheets
     for file_index, file_data in enumerate(sequences_data):
         filename = filenames[file_index]
+        worksheet = workbook.add_worksheet(filename[:31])  # Limit sheet name to 31 characters
         # Write the header for the current file
+        worksheet.write(0, 0, "Entry ID")
+        worksheet.write(0, 1, "Protein Name")
         col = 2
         for repeat in sorted(homorepeats):
+            worksheet.write(0, col, repeat)
             col += 1
         # Write data for each sequence in the current file
+        row = 1
         for entry_id, protein_name, freq in file_data:
             worksheet.write(row, 0, entry_id)
             worksheet.write(row, 1, protein_name)
                 col += 1
             row += 1
     workbook.close()
     output.seek(0)
     return output
 # Streamlit UI components
 st.title("Protein Homorepeat Analysis")
+# Step 1: Upload Excel Files
+uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
 # Step 2: Process files and display results
 if uploaded_files:
     filenames = []
     for file in uploaded_files:
+        excel_data = pd.ExcelFile(file)
+        homorepeats, sequence_data = process_excel(excel_data)  # Modify your process_csv function to process_excel
         if homorepeats is not None:
             all_homorepeats.update(homorepeats)
             all_sequences_data.append(sequence_data)
                     rows.append(row)
             result_df = pd.DataFrame(rows)
+            st.dataframe(result_df)
+# Function to process the Excel file
+def process_excel(excel_data):
+    # Custom logic to process each sheet within the Excel file
+    homorepeats = set()
+    sequence_data = []
+    for sheet_name in excel_data.sheet_names:
+        df = excel_data.parse(sheet_name)
+        for index, row in df.iterrows():
+            entry_id = row['Entry ID']
+            protein_name = row['Protein Name']
+            freq = {repeat: row[repeat] for repeat in df.columns[2:]}  # Assuming repeats start from 3rd column
+            sequence_data.append((entry_id, protein_name, freq))
+            homorepeats.update(freq.keys())
+    return homorepeats, sequence_data