Spaces:

ChatterjeeLab
/

SMILES2PEPTIDE

Running

App Files Files Community

yinuozhang commited on Nov 17, 2024

Commit

a953180

1 Parent(s): 418afab

class format

Browse files

Files changed (1) hide show

app.py +334 -253

app.py CHANGED Viewed

@@ -11,257 +11,294 @@ import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 from io import BytesIO
-def is_peptide(smiles):
-    """Check if the SMILES represents a peptide by looking for peptide bonds"""
-    mol = Chem.MolFromSmiles(smiles)
-    if mol is None:
         return False
-    # Look for peptide bonds: NC(=O) pattern
-    peptide_bond_pattern = Chem.MolFromSmarts('[NH][C](=O)')
-    if mol.HasSubstructMatch(peptide_bond_pattern):
-        return True
-    # Look for N-methylated peptide bonds: N(C)C(=O) pattern
-    n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
-    if mol.HasSubstructMatch(n_methyl_pattern):
-        return True
-    # Look for ester bonds in cyclic depsipeptides: OC(=O) pattern
-    ester_bond_pattern = Chem.MolFromSmarts('O[C](=O)')
-    if mol.HasSubstructMatch(ester_bond_pattern):
-        return True
-    return False
-def remove_nested_branches(smiles):
-    """Remove nested branches from SMILES string"""
-    result = ''
-    depth = 0
-    for char in smiles:
-        if char == '(':
-            depth += 1
-        elif char == ')':
-            depth -= 1
-        elif depth == 0:
-            result += char
-    return result
-def identify_linkage_type(segment):
-    """
-    Identify the type of linkage between residues
-    Returns: tuple (type, is_n_methylated)
-    """
-    if 'OC(=O)' in segment:
-        return ('ester', False)
-    elif 'N(C)C(=O)' in segment:
-        return ('peptide', True)  # N-methylated peptide bond
-    elif 'NC(=O)' in segment:
-        return ('peptide', False)  # Regular peptide bond
-    return (None, False)
-def identify_residue(segment, next_segment=None, prev_segment=None):
-    """
-    Identify amino acid residues with modifications and special handling for both natural and unnatural AAs
-    Returns: tuple (residue, modifications)
-    """
-    modifications = []
-    # Check for N-methylation
-    if 'N(C)' in segment:  # Changed to look in current segment
-        modifications.append('N-Me')
-    if next_segment and 'OC(=O)' in next_segment:
-        modifications.append('O-linked')
-    # Check for Proline - but not if it's actually Cha
-    if any(pattern in segment for pattern in ['CCCN2', 'N2CCC', '[C@@H]2CCCN2', 'CCCN1', 'N1CCC']):
-        if not 'CCCCC' in segment:  # Make sure it's not Cha
-            return ('Pro', modifications)
-    # Check if this segment is part of a Proline ring by looking at context
-    if prev_segment and next_segment:
-        if ('CCC' in segment and 'N' in next_segment) or ('N' in segment and 'CCC' in prev_segment):
-            combined = prev_segment + segment + next_segment
-            if re.search(r'CCCN.*C\(=O\)', combined) and not 'CCCCC' in combined:
-                return ('Pro', modifications)
-    # Check for O-tBu modification FIRST
-    if 'COC(C)(C)C' in segment:
-        return ('O-tBu', modifications)  # or return ('Ser(O-tBu)', modifications) if you prefer
-    # Cyclohexyl amino acid (Cha)
-    if 'N2CCCCC2' in segment or 'CCCCC2' in segment:
-        return ('Cha', modifications)
-    # Aromatic amino acids
-    if 'Cc2ccccc2' in segment or 'c1ccccc1' in segment:
-        return ('Phe', modifications)
-    if 'c2ccc(O)cc2' in segment:
-        return ('Tyr', modifications)
-    if 'c1c[nH]c2ccccc12' in segment:
-        return ('Trp', modifications)
-    if 'c1cnc[nH]1' in segment:
-        return ('His', modifications)
-    # Branched chain amino acids
-    if 'CC(C)C[C@H]' in segment or 'CC(C)C[C@@H]' in segment:
-        return ('Leu', modifications)
-    if '[C@H](CC(C)C)' in segment or '[C@@H](CC(C)C)' in segment:
-        return ('Leu', modifications)
-    if 'C(C)C' in segment and not any(pat in segment for pat in ['CC(C)C', 'C(C)C[C@H]', 'C(C)C[C@@H]']):
-        return ('Val', modifications)
-    if 'C(C)C[C@H]' in segment or 'C(C)C[C@@H]' in segment:
-        return ('Ile', modifications)
-    # Small/polar amino acids - make Ala check more specific
-    if '[C@H](CO)' in segment:
-        return ('Ser', modifications)
-    if '[C@@H]([C@@H](C)O)' in segment or '[C@H]([C@H](C)O)' in segment:
-        return ('Thr', modifications)
-    if '[C@H]' in segment and not any(pat in segment for pat in ['C(C)', 'CC', 'O', 'N', 'S']):
-        return ('Gly', modifications)
-    if ('[C@@H](C)' in segment or '[C@H](C)' in segment) and \
-        not any(pat in segment for pat in ['O', 'CC(C)', 'COC']):
-         return ('Ala', modifications)
-    return (None, modifications)
-def parse_peptide(smiles):
-    """
-    Parse peptide sequence with better segment identification
-    """
-    # Split at each peptide bond C(=O)N
-    segments = []
-    bonds = list(re.finditer(r'C\(=O\)N(?:\(C\))?', smiles))
-    # Handle first residue (before first bond)
-    first_bond = bonds[0].start()
-    first_segment = smiles[0:first_bond]
-    segments.append(first_segment)
-    # Handle middle residues
-    for i in range(len(bonds)):
-        start = bonds[i].end()
-        end = bonds[i+1].start() if i < len(bonds)-1 else len(smiles)
-        segment = smiles[start:end]
-        is_n_me = 'N(C)' in bonds[i].group()
-        segments.append((segment, is_n_me))
-    sequence = []
-    # Handle first residue
-    residue, mods = identify_residue(segments[0])
-    if residue:
-        sequence.append(residue)
-    # Handle rest of residues
-    for segment, is_n_me in segments[1:]:
-        residue, mods = identify_residue(segment)
-        if is_n_me:
-            mods.append('N-Me')
-        if residue:
-            if mods:
-                sequence.append(f"{residue}({','.join(mods)})")
-            else:
-                sequence.append(residue)
-    print("\nDetailed Analysis:")
-    print("Segments:", segments)
-    print("Found sequence:", sequence)
-    if is_cyclic_peptide(smiles):
-        return f"cyclo({'-'.join(sequence)})"
-    return '-'.join(sequence)
-def is_cyclic_peptide(smiles):
-    """
-    Determine if SMILES represents a cyclic peptide by checking:
-    1. Proper cycle number pairing
-    2. Presence of peptide bonds between cycle points
-    3. Distinguishing between aromatic rings and peptide cycles
-    """
-    cycle_info = {}
-    # Find all cycle numbers and their contexts
-    for match in re.finditer(r'(\d)', smiles):
-        number = match.group(1)
-        position = match.start(1)
-        if number not in cycle_info:
-            cycle_info[number] = []
-        cycle_info[number].append({
-            'position': position,
-            'full_context': smiles[max(0, position-3):min(len(smiles), position+4)]
-        })
-    # Print cycle information for debugging
-    print("\nCycle Analysis:")
-    for num, occurrences in cycle_info.items():
-        print(f"Cycle number {num}:")
-        for occ in occurrences:
-            print(f"Position: {occ['position']}")
-            print(f"Context: {occ['full_context']}")
-    # Check each cycle
-    peptide_cycles = []
-    aromatic_cycles = []
-    for number, occurrences in cycle_info.items():
-        if len(occurrences) != 2:
-            continue
-        start, end = occurrences[0]['position'], occurrences[1]['position']
-        # Get wider context for cycle classification
-        segment = smiles[start:end+1]
-        # First check if this is clearly an aromatic ring (phenylalanine side chain)
-        full_context = smiles[max(0,start-10):min(len(smiles),end+10)]
-        is_aromatic = ('c2ccccc2' in full_context and len(segment) < 20) or ('c1ccccc1' in full_context and len(segment) < 20)
-        # Check for peptide bonds, including N-methylated ones
-        peptide_patterns = [
-            'C(=O)N',  # Regular peptide bond
-            'C(=O)N(C)',  # N-methylated peptide bond
-            'C(=O)N1',  # Cyclic peptide bond
-            'C(=O)N2'   # Cyclic peptide bond
-        ]
-        # A peptide cycle should have multiple C(=O)N patterns and be longer
-        has_peptide_bond = any(pattern in segment for pattern in peptide_patterns) and len(segment) > 20
-        if is_aromatic and len(segment) < 20:  # Aromatic rings are typically shorter segments
-            aromatic_cycles.append(number)
-        elif has_peptide_bond:
-            peptide_cycles.append(number)
-    print("\nFound cycles:")
-    print(f"Peptide cycles: {peptide_cycles}")
-    print(f"Aromatic cycles: {aromatic_cycles}")
-    return len(peptide_cycles) > 0
-def analyze_single_smiles(smiles):
-    """Analyze a single SMILES string"""
-    try:
-        is_cyclic, peptide_cycles, aromatic_cycles = is_cyclic_peptide(smiles)
-        sequence = parse_peptide(smiles)
-        details = {
-            #'SMILES': smiles,
-            'Sequence': sequence,
-            'Is Cyclic': 'Yes' if is_cyclic else 'No',
-            #'Peptide Cycles': ', '.join(peptide_cycles) if peptide_cycles else 'None',
-            #'Aromatic Cycles': ', '.join(aromatic_cycles) if aromatic_cycles else 'None'
-        }
-        return details
-    except Exception as e:
-        return {
-            #'SMILES': smiles,
-            'Sequence': f'Error: {str(e)}',
-            'Is Cyclic': 'Error',
-            #'Peptide Cycles': 'Error',
-            #'Aromatic Cycles': 'Error'
-        }
 """
 def annotate_cyclic_structure(mol, sequence):
     '''Create annotated 2D structure with clear, non-overlapping residue labels'''
@@ -529,16 +566,15 @@ def create_enhanced_linear_viz(sequence, smiles):
     return fig
 def process_input(smiles_input=None, file_obj=None, show_linear=False):
-    """Process input and create visualizations"""
-    results = []
-    images = []
     # Handle direct SMILES input
     if smiles_input:
         smiles = smiles_input.strip()
-        # First check if it's a peptide
-        if not is_peptide(smiles):
             return "Error: Input SMILES does not appear to be a peptide structure.", None, None
         try:
@@ -547,9 +583,32 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False):
             if mol is None:
                 return "Error: Invalid SMILES notation.", None, None
-            # Get sequence and cyclic information
-            sequence = parse_peptide(smiles)
-            is_cyclic, peptide_cycles, aromatic_cycles = is_cyclic_peptide(smiles)
             # Create cyclic structure visualization
             img_cyclic = annotate_cyclic_structure(mol, sequence)
@@ -558,19 +617,21 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False):
             img_linear = None
             if show_linear:
                 fig_linear = create_enhanced_linear_viz(sequence, smiles)
-                # Convert matplotlib figure to image
                 buf = BytesIO()
                 fig_linear.savefig(buf, format='png', bbox_inches='tight', dpi=300)
                 buf.seek(0)
                 img_linear = Image.open(buf)
                 plt.close(fig_linear)
-            # Format text output
-            output_text = f"Sequence: {sequence}\n"
-            output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
-            return output_text, img_cyclic, img_linear
         except Exception as e:
             return f"Error processing SMILES: {str(e)}", None, None
@@ -578,31 +639,51 @@ def process_input(smiles_input=None, file_obj=None, show_linear=False):
     # Handle file input
     if file_obj is not None:
         try:
-            # Handle file content based on file object type
-            if hasattr(file_obj, 'name'):  # If it's a file path
                 with open(file_obj.name, 'r') as f:
                     content = f.read()
-            else:  # If it's file content
                 content = file_obj.decode('utf-8') if isinstance(file_obj, bytes) else str(file_obj)
             output_text = ""
             for line in content.splitlines():
                 smiles = line.strip()
                 if smiles:
-                    if not is_peptide(smiles):
                         output_text += f"Skipping non-peptide SMILES: {smiles}\n"
                         continue
-                    result = analyze_single_smiles(smiles)
-                    output_text += f"Sequence: {result['Sequence']}\n"
-                    output_text += f"Is Cyclic: {result['Is Cyclic']}\n"
                     output_text += "-" * 50 + "\n"
             return output_text, None, None
         except Exception as e:
             return f"Error processing file: {str(e)}", None, None
     return "No input provided.", None, None
 # Create Gradio interface with simplified examples
 iface = gr.Interface(
     fn=process_input,

 import matplotlib.patches as patches
 from io import BytesIO
+import re
+from rdkit import Chem
+class PeptideAnalyzer:
+    def __init__(self):
+        self.bond_patterns = [
+            r'OC\(=O\)',  # ester bond
+            r'N\(C\)C\(=O\)',  # N-methylated peptide bond
+            r'N[12]?C\(=O\)',  # peptide bond (including Pro N1/N2)
+            r'C\(=O\)N\(C\)',  # N-methylated peptide bond reverse
+            r'C\(=O\)N'  # peptide bond reverse
+        ]
+    def is_peptide(self, smiles):
+        """Check if the SMILES represents a peptide structure"""
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            return False
+        # Look for peptide bonds: NC(=O) pattern
+        peptide_bond_pattern = Chem.MolFromSmarts('[NH][C](=O)')
+        if mol.HasSubstructMatch(peptide_bond_pattern):
+            return True
+        # Look for N-methylated peptide bonds: N(C)C(=O) pattern
+        n_methyl_pattern = Chem.MolFromSmarts('[N;H0;$(NC)](C)[C](=O)')
+        if mol.HasSubstructMatch(n_methyl_pattern):
+            return True
+        # Look for ester bonds in cyclic depsipeptides: OC(=O) pattern
+        ester_bond_pattern = Chem.MolFromSmarts('O[C](=O)')
+        if mol.HasSubstructMatch(ester_bond_pattern):
+            return True
         return False
+    def is_cyclic(self, smiles):
+        """
+        Determine if SMILES represents a cyclic peptide
+        Returns: (is_cyclic, peptide_cycles, aromatic_cycles)
+        """
+        cycle_info = {}
+        # Find all cycle numbers and their contexts
+        for match in re.finditer(r'(\d)', smiles):
+            number = match.group(1)
+            position = match.start(1)
+            if number not in cycle_info:
+                cycle_info[number] = []
+            cycle_info[number].append({
+                'position': position,
+                'full_context': smiles[max(0, position-3):min(len(smiles), position+4)]
+            })
+        # Check each cycle
+        peptide_cycles = []
+        aromatic_cycles = []
+        for number, occurrences in cycle_info.items():
+            if len(occurrences) != 2:
+                continue
+            start, end = occurrences[0]['position'], occurrences[1]['position']
+            segment = smiles[start:end+1]
+            # Check for aromatic rings
+            full_context = smiles[max(0,start-10):min(len(smiles),end+10)]
+            is_aromatic = ('c2ccccc2' in full_context and len(segment) < 20) or \
+                         ('c1ccccc1' in full_context and len(segment) < 20)
+            # Check for peptide bonds
+            peptide_patterns = [
+                'C(=O)N',  # Regular peptide bond
+                'C(=O)N(C)',  # N-methylated peptide bond
+                'C(=O)N1',  # Cyclic peptide bond
+                'C(=O)N2'   # Cyclic peptide bond
+            ]
+            has_peptide_bond = any(pattern in segment for pattern in peptide_patterns) and \
+                              len(segment) > 20
+            if is_aromatic and len(segment) < 20:
+                aromatic_cycles.append(number)
+            elif has_peptide_bond:
+                peptide_cycles.append(number)
+        return len(peptide_cycles) > 0, peptide_cycles, aromatic_cycles
+    def split_on_bonds(self, smiles):
+        """Split SMILES into segments with simplified Pro handling"""
+        positions = []
+        used = set()
+        # Find Gly pattern first
+        gly_pattern = r'NCC\(=O\)'
+        for match in re.finditer(gly_pattern, smiles):
+            if not any(p in range(match.start(), match.end()) for p in used):
+                positions.append({
+                    'start': match.start(),
+                    'end': match.end(),
+                    'type': 'gly',
+                    'pattern': match.group()
+                })
+                used.update(range(match.start(), match.end()))
+        # Then find all bonds, including N2C(=O)
+        bond_patterns = [
+            (r'OC\(=O\)', 'ester'),
+            (r'N\(C\)C\(=O\)', 'n_methyl'),
+            (r'N[12]C\(=O\)', 'peptide'),  # Pro peptide bonds
+            (r'NC\(=O\)', 'peptide'),  # Regular peptide bonds
+            (r'C\(=O\)N\(C\)', 'n_methyl'),
+            (r'C\(=O\)N[12]?', 'peptide')
+        ]
+        for pattern, bond_type in bond_patterns:
+            for match in re.finditer(pattern, smiles):
+                if not any(p in range(match.start(), match.end()) for p in used):
+                    positions.append({
+                        'start': match.start(),
+                        'end': match.end(),
+                        'type': bond_type,
+                        'pattern': match.group()
+                    })
+                    used.update(range(match.start(), match.end()))
+        # Sort by position
+        positions.sort(key=lambda x: x['start'])
+        # Create segments
+        segments = []
+        if positions:
+            # First segment
+            if positions[0]['start'] > 0:
+                segments.append({
+                    'content': smiles[0:positions[0]['start']],
+                    'bond_after': positions[0]['pattern']
+                })
+            # Process segments
+            for i in range(len(positions)-1):
+                current = positions[i]
+                next_pos = positions[i+1]
+                if current['type'] == 'gly':
+                    segments.append({
+                        'content': 'NCC(=O)',
+                        'bond_before': positions[i-1]['pattern'] if i > 0 else None,
+                        'bond_after': next_pos['pattern']
+                    })
+                else:
+                    content = smiles[current['end']:next_pos['start']]
+                    if content:
+                        segments.append({
+                            'content': content,
+                            'bond_before': current['pattern'],
+                            'bond_after': next_pos['pattern']
+                        })
+            # Last segment
+            if positions[-1]['end'] < len(smiles):
+                segments.append({
+                    'content': smiles[positions[-1]['end']:],
+                    'bond_before': positions[-1]['pattern']
+                })
+        return segments
+    def identify_residue(self, segment):
+        """Identify residue with Pro reconstruction"""
+        content = segment['content']
+        mods = self.get_modifications(segment)
+        # Special handling for Pro: reconstruct the complete pattern
+        if (segment.get('bond_after') == 'N2C(=O)' and 'CCC' in content) or \
+            ('CCCN2' in content and content.endswith('=O')):  # End case
+        # Reconstruct the complete Pro pattern
+            if '[C@@H]2' in content or '[C@H]2' in content:
+                return 'Pro', mods
+        if ('C[C@H](CCCC)' in content or 'C[C@@H](CCCC)' in content) and 'CC(C)' not in content:
+            return 'Nle', mods
+        # Ornithine (Orn) - 3-carbon chain with NH2
+        if ('C[C@H](CCCN)' in content or 'C[C@@H](CCCN)' in content) and 'CC(C)' not in content:
+            return 'Orn', mods
+        # 2-Naphthylalanine (2Nal) - distinct from Phe pattern
+        if ('Cc3cc2ccccc2c3' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return '2Nal', mods
+        # Cyclohexylalanine (Cha) - already in your code but moved here for clarity
+        if 'N2CCCCC2' in content or 'CCCCC2' in content:
+            return 'Cha', mods
+        # Aminobutyric acid (Abu) - 2-carbon chain
+        if ('C[C@H](CC)' in content or 'C[C@@H](CC)' in content) and not any(p in content for p in ['CC(C)', 'CCCC', 'CCC(C)']):
+            return 'Abu', mods
+        # Pipecolic acid (Pip) - 6-membered ring like Pro
+        if ('N3CCCCC3' in content or 'CCCCC3' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return 'Pip', mods
+        # Cyclohexylglycine (Chg) - direct cyclohexyl without CH2
+        if ('C[C@H](C1CCCCC1)' in content or 'C[C@@H](C1CCCCC1)' in content):
+            return 'Chg', mods
+        # 4-Fluorophenylalanine (4F-Phe)
+        if ('Cc2ccc(F)cc2' in content) and ('C[C@H]' in content or 'C[C@@H]' in content):
+            return '4F-Phe', mods
+        # Regular residue identification
+        if 'NCC(=O)' in content:
+            return 'Gly', mods
+        if 'CC(C)C[C@H]' in content or 'CC(C)C[C@@H]' in content:
+            return 'Leu', mods
+        if '[C@@H](CC(C)C)' in content or '[C@H](CC(C)C)' in content:
+            return 'Leu', mods
+        if ('C(C)C[C@H]' in content or 'C(C)C[C@@H]' in content) and 'CC(C)C' not in content:
+            return 'Ile', mods
+        if '[C@@H]([C@@H](C)O)' in content or '[C@H]([C@H](C)O)' in content:
+            return 'Thr', mods
+        if '[C@H](Cc2ccccc2)' in content or '[C@@H](Cc2ccccc2)' in content:
+            return 'Phe', mods
+        if '[C@H](C(C)C)' in content or '[C@@H](C(C)C)' in content:
+            if not any(p in content for p in ['CC(C)C[C@H]', 'CC(C)C[C@@H]']):
+                return 'Val', mods
+        if '[C@H](COC(C)(C)C)' in content or '[C@@H](COC(C)(C)C)' in content:
+            return 'O-tBu', mods
+        if ('[C@H](C)' in content or '[C@@H](C)' in content):
+            if not any(p in content for p in ['C(C)C', 'COC', 'CN(', 'C(C)O']):
+                return 'Ala', mods
+        return None, mods
+    def get_modifications(self, segment):
+        """Get modifications based on bond types"""
+        mods = []
+        if segment.get('bond_after'):
+            if 'N(C)' in segment['bond_after'] or segment['bond_after'].startswith('C(=O)N(C)'):
+                mods.append('N-Me')
+            if 'OC(=O)' in segment['bond_after']:
+                mods.append('O-linked')
+        return mods
+    def analyze_structure(self, smiles):
+        """Main analysis function"""
+        print("\nAnalyzing structure:", smiles)
+        # Split into segments
+        segments = self.split_on_bonds(smiles)
+        print("\nSegment Analysis:")
+        sequence = []
+        for i, segment in enumerate(segments):
+            print(f"\nSegment {i}:")
+            print(f"Content: {segment['content']}")
+            print(f"Bond before: {segment.get('bond_before', 'None')}")
+            print(f"Bond after: {segment.get('bond_after', 'None')}")
+            residue, mods = self.identify_residue(segment)
+            if residue:
+                if mods:
+                    sequence.append(f"{residue}({','.join(mods)})")
+                else:
+                    sequence.append(residue)
+                print(f"Identified as: {residue}")
+                print(f"Modifications: {mods}")
+            else:
+                print(f"Warning: Could not identify residue in segment: {segment['content']}")
+        # Check if cyclic
+        is_cyclic = 'N1' in smiles or 'N2' in smiles
+        final_sequence = f"cyclo({'-'.join(sequence)})" if is_cyclic else '-'.join(sequence)
+        print(f"\nFinal sequence: {final_sequence}")
+        return final_sequence
 """
 def annotate_cyclic_structure(mol, sequence):
     '''Create annotated 2D structure with clear, non-overlapping residue labels'''
     return fig
 def process_input(smiles_input=None, file_obj=None, show_linear=False):
+    """Process input and create visualizations using PeptideAnalyzer"""
+    analyzer = PeptideAnalyzer()
     # Handle direct SMILES input
     if smiles_input:
         smiles = smiles_input.strip()
+        # First check if it's a peptide using analyzer's method
+        if not analyzer.is_peptide(smiles):
             return "Error: Input SMILES does not appear to be a peptide structure.", None, None
         try:
             if mol is None:
                 return "Error: Invalid SMILES notation.", None, None
+            # Use analyzer to get sequence
+            segments = analyzer.split_on_bonds(smiles)
+            # Process segments and build sequence
+            sequence_parts = []
+            output_text = "Segment Analysis:\n"
+            for i, segment in enumerate(segments):
+                output_text += f"\nSegment {i}:\n"
+                output_text += f"Content: {segment['content']}\n"
+                output_text += f"Bond before: {segment.get('bond_before', 'None')}\n"
+                output_text += f"Bond after: {segment.get('bond_after', 'None')}\n"
+                residue, mods = analyzer.identify_residue(segment)
+                if residue:
+                    if mods:
+                        sequence_parts.append(f"{residue}({','.join(mods)})")
+                    else:
+                        sequence_parts.append(residue)
+                    output_text += f"Identified as: {residue}\n"
+                    output_text += f"Modifications: {mods}\n"
+                else:
+                    output_text += f"Warning: Could not identify residue in segment: {segment['content']}\n"
+            # Check if cyclic using analyzer's method
+            is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
+            sequence = f"cyclo({'-'.join(sequence_parts)})" if is_cyclic else '-'.join(sequence_parts)
             # Create cyclic structure visualization
             img_cyclic = annotate_cyclic_structure(mol, sequence)
             img_linear = None
             if show_linear:
                 fig_linear = create_enhanced_linear_viz(sequence, smiles)
                 buf = BytesIO()
                 fig_linear.savefig(buf, format='png', bbox_inches='tight', dpi=300)
                 buf.seek(0)
                 img_linear = Image.open(buf)
                 plt.close(fig_linear)
+            # Add summary to output
+            summary = f"\nSummary:\n"
+            summary += f"Sequence: {sequence}\n"
+            summary += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
+            if is_cyclic:
+                summary += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
+                summary += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
+            return summary + "\n" + output_text, img_cyclic, img_linear
         except Exception as e:
             return f"Error processing SMILES: {str(e)}", None, None
     # Handle file input
     if file_obj is not None:
         try:
+            # Handle file content
+            if hasattr(file_obj, 'name'):
                 with open(file_obj.name, 'r') as f:
                     content = f.read()
+            else:
                 content = file_obj.decode('utf-8') if isinstance(file_obj, bytes) else str(file_obj)
             output_text = ""
             for line in content.splitlines():
                 smiles = line.strip()
                 if smiles:
+                    # Check if it's a peptide
+                    if not analyzer.is_peptide(smiles):
                         output_text += f"Skipping non-peptide SMILES: {smiles}\n"
                         continue
+                    # Process this SMILES
+                    segments = analyzer.split_on_bonds(smiles)
+                    sequence_parts = []
+                    for segment in segments:
+                        residue, mods = analyzer.identify_residue(segment)
+                        if residue:
+                            if mods:
+                                sequence_parts.append(f"{residue}({','.join(mods)})")
+                            else:
+                                sequence_parts.append(residue)
+                    # Get cyclicity and create sequence
+                    is_cyclic, peptide_cycles, aromatic_cycles = analyzer.is_cyclic(smiles)
+                    sequence = f"cyclo({'-'.join(sequence_parts)})" if is_cyclic else '-'.join(sequence_parts)
+                    output_text += f"SMILES: {smiles}\n"
+                    output_text += f"Sequence: {sequence}\n"
+                    output_text += f"Is Cyclic: {'Yes' if is_cyclic else 'No'}\n"
+                    if is_cyclic:
+                        output_text += f"Peptide Cycles: {', '.join(peptide_cycles)}\n"
+                        output_text += f"Aromatic Cycles: {', '.join(aromatic_cycles)}\n"
                     output_text += "-" * 50 + "\n"
             return output_text, None, None
         except Exception as e:
             return f"Error processing file: {str(e)}", None, None
     return "No input provided.", None, None
 # Create Gradio interface with simplified examples
 iface = gr.Interface(
     fn=process_input,