Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -92,13 +92,15 @@ class ProteinAnalyzer:
|
|
| 92 |
return ''.join(protein)
|
| 93 |
|
| 94 |
@staticmethod
|
| 95 |
-
def analyze_protein_with_llm(protein_sequence: str, cell_type: str) -> str:
|
| 96 |
"""Analyze protein structure and function using Friendli LLM API"""
|
| 97 |
|
| 98 |
# Get API token from environment
|
| 99 |
token = os.getenv("FRIENDLI_TOKEN")
|
| 100 |
if not token:
|
| 101 |
logger.warning("FRIENDLI_TOKEN not found in environment variables")
|
|
|
|
|
|
|
| 102 |
return "Protein analysis unavailable: API token not configured"
|
| 103 |
|
| 104 |
try:
|
|
@@ -108,8 +110,23 @@ class ProteinAnalyzer:
|
|
| 108 |
"Content-Type": "application/json"
|
| 109 |
}
|
| 110 |
|
| 111 |
-
# Create prompt for protein analysis
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
Protein sequence: {protein_sequence}
|
| 115 |
Cell type context: {cell_type}
|
|
@@ -128,7 +145,7 @@ Keep the response concise but informative, suitable for display in a scientific
|
|
| 128 |
"messages": [
|
| 129 |
{
|
| 130 |
"role": "system",
|
| 131 |
-
"content": "You are a knowledgeable bioinformatics assistant specializing in protein structure and function prediction."
|
| 132 |
},
|
| 133 |
{
|
| 134 |
"role": "user",
|
|
@@ -214,10 +231,10 @@ class DNADiffusionApp:
|
|
| 214 |
logger.error(f"Generation failed: {e}")
|
| 215 |
raise
|
| 216 |
|
| 217 |
-
def handle_generation_request(self, cell_type: str, guidance_scale: float):
|
| 218 |
"""Handle sequence generation request from Gradio"""
|
| 219 |
try:
|
| 220 |
-
logger.info(f"Generating sequence for cell type: {cell_type}")
|
| 221 |
|
| 222 |
# Generate DNA sequence
|
| 223 |
sequence, metadata = self.generate_sequence(cell_type, guidance_scale)
|
|
@@ -233,7 +250,7 @@ class DNADiffusionApp:
|
|
| 233 |
# Analyze protein with LLM
|
| 234 |
logger.info("Analyzing protein structure and function...")
|
| 235 |
protein_analysis = self.protein_analyzer.analyze_protein_with_llm(
|
| 236 |
-
protein_sequence, cell_type
|
| 237 |
)
|
| 238 |
|
| 239 |
# Add analysis to metadata
|
|
@@ -277,6 +294,7 @@ def create_demo():
|
|
| 277 |
|
| 278 |
if (event.data.type === 'generate_request') {
|
| 279 |
console.log('Triggering generation for cell type:', event.data.cellType);
|
|
|
|
| 280 |
|
| 281 |
// Update the hidden cell type input
|
| 282 |
const radioInputs = document.querySelectorAll('#cell-type-input input[type="radio"]');
|
|
@@ -288,6 +306,15 @@ def create_demo():
|
|
| 288 |
}
|
| 289 |
});
|
| 290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
// Small delay to ensure radio button update is processed
|
| 292 |
setTimeout(() => {
|
| 293 |
document.querySelector('#generate-btn').click();
|
|
@@ -340,6 +367,12 @@ def create_demo():
|
|
| 340 |
label="Cell Type",
|
| 341 |
elem_id="cell-type-input"
|
| 342 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
guidance_input = gr.Slider(
|
| 344 |
minimum=1.0,
|
| 345 |
maximum=10.0,
|
|
@@ -366,7 +399,7 @@ def create_demo():
|
|
| 366 |
# Wire up the generation
|
| 367 |
generate_btn.click(
|
| 368 |
fn=app.handle_generation_request,
|
| 369 |
-
inputs=[cell_type_input, guidance_input],
|
| 370 |
outputs=[sequence_output, metadata_output]
|
| 371 |
).then(
|
| 372 |
fn=None,
|
|
|
|
| 92 |
return ''.join(protein)
|
| 93 |
|
| 94 |
@staticmethod
|
| 95 |
+
def analyze_protein_with_llm(protein_sequence: str, cell_type: str, language: str = "en") -> str:
|
| 96 |
"""Analyze protein structure and function using Friendli LLM API"""
|
| 97 |
|
| 98 |
# Get API token from environment
|
| 99 |
token = os.getenv("FRIENDLI_TOKEN")
|
| 100 |
if not token:
|
| 101 |
logger.warning("FRIENDLI_TOKEN not found in environment variables")
|
| 102 |
+
if language == "ko":
|
| 103 |
+
return "๋จ๋ฐฑ์ง ๋ถ์ ๋ถ๊ฐ: API ํ ํฐ์ด ์ค์ ๋์ง ์์์ต๋๋ค"
|
| 104 |
return "Protein analysis unavailable: API token not configured"
|
| 105 |
|
| 106 |
try:
|
|
|
|
| 110 |
"Content-Type": "application/json"
|
| 111 |
}
|
| 112 |
|
| 113 |
+
# Create prompt for protein analysis based on language
|
| 114 |
+
if language == "ko":
|
| 115 |
+
prompt = f"""๋น์ ์ ์๋ฌผ์ ๋ณดํ ์ ๋ฌธ๊ฐ์
๋๋ค. ๋ค์ ๋จ๋ฐฑ์ง ์์ด์ ๋ถ์ํ๊ณ ์ ์ฌ์ ์ธ ๊ตฌ์กฐ์ ๊ธฐ๋ฅ์ ๋ํ ํต์ฐฐ๋ ฅ์ ์ ๊ณตํด์ฃผ์ธ์.
|
| 116 |
+
|
| 117 |
+
๋จ๋ฐฑ์ง ์์ด: {protein_sequence}
|
| 118 |
+
์ธํฌ ์ ํ: {cell_type}
|
| 119 |
+
|
| 120 |
+
๋ค์ ๋ด์ฉ์ ํฌํจํด์ฃผ์ธ์:
|
| 121 |
+
1. ์์ด ํจํด์ ๊ธฐ๋ฐ์ผ๋ก ์์ธก๋๋ ๋จ๋ฐฑ์ง ํจ๋ฐ๋ฆฌ ๋๋ ๋๋ฉ์ธ
|
| 122 |
+
2. ์ ์ฌ์ ์ธ ๊ตฌ์กฐ์ ํน์ง (์ํ ๋์ , ๋ฒ ํ ์ํธ, ๋ฃจํ)
|
| 123 |
+
3. ๊ฐ๋ฅํ ์๋ฌผํ์ ๊ธฐ๋ฅ
|
| 124 |
+
4. {cell_type} ์ธํฌ ์ ํ๊ณผ์ ๊ด๋ จ์ฑ
|
| 125 |
+
5. ์ฃผ๋ชฉํ ๋งํ ์์ด ๋ชจํฐํ๋ ํน์ฑ
|
| 126 |
+
|
| 127 |
+
๊ณผํ ์ ํ๋ฆฌ์ผ์ด์
์ ํ์ํ๊ธฐ์ ์ ํฉํ๋๋ก ๊ฐ๊ฒฐํ๋ฉด์๋ ์ ์ตํ ์๋ต์ ์์ฑํด์ฃผ์ธ์."""
|
| 128 |
+
else:
|
| 129 |
+
prompt = f"""You are a bioinformatics expert. Analyze the following protein sequence and provide insights about its potential structure and function.
|
| 130 |
|
| 131 |
Protein sequence: {protein_sequence}
|
| 132 |
Cell type context: {cell_type}
|
|
|
|
| 145 |
"messages": [
|
| 146 |
{
|
| 147 |
"role": "system",
|
| 148 |
+
"content": "You are a knowledgeable bioinformatics assistant specializing in protein structure and function prediction." if language == "en" else "๋น์ ์ ๋จ๋ฐฑ์ง ๊ตฌ์กฐ์ ๊ธฐ๋ฅ ์์ธก์ ์ ๋ฌธ์ผ๋ก ํ๋ ์ง์์ด ํ๋ถํ ์๋ฌผ์ ๋ณดํ ์ด์์คํดํธ์
๋๋ค."
|
| 149 |
},
|
| 150 |
{
|
| 151 |
"role": "user",
|
|
|
|
| 231 |
logger.error(f"Generation failed: {e}")
|
| 232 |
raise
|
| 233 |
|
| 234 |
+
def handle_generation_request(self, cell_type: str, guidance_scale: float, language: str = "en"):
|
| 235 |
"""Handle sequence generation request from Gradio"""
|
| 236 |
try:
|
| 237 |
+
logger.info(f"Generating sequence for cell type: {cell_type}, language: {language}")
|
| 238 |
|
| 239 |
# Generate DNA sequence
|
| 240 |
sequence, metadata = self.generate_sequence(cell_type, guidance_scale)
|
|
|
|
| 250 |
# Analyze protein with LLM
|
| 251 |
logger.info("Analyzing protein structure and function...")
|
| 252 |
protein_analysis = self.protein_analyzer.analyze_protein_with_llm(
|
| 253 |
+
protein_sequence, cell_type, language
|
| 254 |
)
|
| 255 |
|
| 256 |
# Add analysis to metadata
|
|
|
|
| 294 |
|
| 295 |
if (event.data.type === 'generate_request') {
|
| 296 |
console.log('Triggering generation for cell type:', event.data.cellType);
|
| 297 |
+
console.log('Language:', event.data.language);
|
| 298 |
|
| 299 |
// Update the hidden cell type input
|
| 300 |
const radioInputs = document.querySelectorAll('#cell-type-input input[type="radio"]');
|
|
|
|
| 306 |
}
|
| 307 |
});
|
| 308 |
|
| 309 |
+
// Update the language input
|
| 310 |
+
const langInputs = document.querySelectorAll('#language-input input[type="radio"]');
|
| 311 |
+
langInputs.forEach(input => {
|
| 312 |
+
if (input.value === event.data.language) {
|
| 313 |
+
input.checked = true;
|
| 314 |
+
input.dispatchEvent(new Event('change'));
|
| 315 |
+
}
|
| 316 |
+
});
|
| 317 |
+
|
| 318 |
// Small delay to ensure radio button update is processed
|
| 319 |
setTimeout(() => {
|
| 320 |
document.querySelector('#generate-btn').click();
|
|
|
|
| 367 |
label="Cell Type",
|
| 368 |
elem_id="cell-type-input"
|
| 369 |
)
|
| 370 |
+
language_input = gr.Radio(
|
| 371 |
+
["en", "ko"],
|
| 372 |
+
value="en",
|
| 373 |
+
label="Language",
|
| 374 |
+
elem_id="language-input"
|
| 375 |
+
)
|
| 376 |
guidance_input = gr.Slider(
|
| 377 |
minimum=1.0,
|
| 378 |
maximum=10.0,
|
|
|
|
| 399 |
# Wire up the generation
|
| 400 |
generate_btn.click(
|
| 401 |
fn=app.handle_generation_request,
|
| 402 |
+
inputs=[cell_type_input, guidance_input, language_input],
|
| 403 |
outputs=[sequence_output, metadata_output]
|
| 404 |
).then(
|
| 405 |
fn=None,
|