Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ import pandas as pd
|
|
| 18 |
import PyPDF2 # For handling PDF files
|
| 19 |
from collections import Counter
|
| 20 |
|
| 21 |
-
from openai import OpenAI, APIError, APITimeoutError
|
| 22 |
from gradio_client import Client
|
| 23 |
from kaggle.api.kaggle_api_extended import KaggleApi
|
| 24 |
import tempfile
|
|
@@ -30,10 +30,32 @@ import pyarrow.parquet as pq
|
|
| 30 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 31 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 32 |
|
| 33 |
-
# βββ
|
| 34 |
import httpx
|
| 35 |
from httpx import RemoteProtocolError
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# βββββββββββββββββββββββββββββββ Environment Variables / Constants βββββββββββββββββββββββββ
|
| 39 |
|
|
@@ -49,12 +71,9 @@ if not (KAGGLE_USERNAME and KAGGLE_KEY):
|
|
| 49 |
os.environ["KAGGLE_USERNAME"] = KAGGLE_USERNAME
|
| 50 |
os.environ["KAGGLE_KEY"] = KAGGLE_KEY
|
| 51 |
|
| 52 |
-
# βΆ μΌλΆ νλ‘μμμ HTTP/2 β 1.1 κ°μ μ νμ΄ νμν λ νμ±ν
|
| 53 |
-
# os.environ["HTTPX_FORCE_HTTP1"] = "true"
|
| 54 |
-
|
| 55 |
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
| 56 |
IMAGE_API_URL = "http://211.233.58.201:7896" # μμ μ΄λ―Έμ§ μμ±μ© API
|
| 57 |
-
MAX_TOKENS =
|
| 58 |
|
| 59 |
# βββββββββββββββββββββββββββββββ Logging βββββββββββββββββββββββββββββββ
|
| 60 |
logging.basicConfig(
|
|
@@ -2038,21 +2057,22 @@ def process_example(topic):
|
|
| 2038 |
def process_input(prompt: str, uploaded_files):
|
| 2039 |
"""
|
| 2040 |
λ©μΈ μ±ν
μ
λ ₯μ λ°μ λμμΈ/λ°λͺ
μμ΄λμ΄λ₯Ό μμ±νλ€.
|
| 2041 |
-
μ€νΈλ¦¬λ°
|
| 2042 |
-
|
| 2043 |
"""
|
|
|
|
| 2044 |
if not any(m["role"] == "user" and m["content"] == prompt for m in st.session_state.messages):
|
| 2045 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 2046 |
with st.chat_message("user"):
|
| 2047 |
st.markdown(prompt)
|
| 2048 |
|
| 2049 |
-
# λμΌ ν둬ννΈ-μλ΅ μ€λ³΅ λ°©μ§
|
| 2050 |
for i in range(len(st.session_state.messages) - 1):
|
| 2051 |
if (st.session_state.messages[i]["role"] == "user"
|
| 2052 |
and st.session_state.messages[i]["content"] == prompt
|
| 2053 |
and st.session_state.messages[i + 1]["role"] == "assistant"):
|
| 2054 |
return
|
| 2055 |
|
|
|
|
| 2056 |
with st.chat_message("assistant"):
|
| 2057 |
status = st.status("Preparing to generate invention ideasβ¦")
|
| 2058 |
stream_placeholder = st.empty()
|
|
@@ -2064,8 +2084,6 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2064 |
|
| 2065 |
selected_cat = st.session_state.get("category_focus", None)
|
| 2066 |
selected_frameworks = st.session_state.get("selected_frameworks", [])
|
| 2067 |
-
|
| 2068 |
-
# λͺ©μ μ΄ "λμμΈ/λ°λͺ
"μ΄λ―λ‘, system prompt λ³κ²½
|
| 2069 |
sys_prompt = get_idea_system_prompt(
|
| 2070 |
selected_category=selected_cat,
|
| 2071 |
selected_frameworks=selected_frameworks
|
|
@@ -2080,9 +2098,9 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2080 |
use_kaggle = st.session_state.kaggle_enabled
|
| 2081 |
has_uploaded = bool(uploaded_files)
|
| 2082 |
|
| 2083 |
-
search_content = kaggle_content = file_content = mil_content = None
|
| 2084 |
|
| 2085 |
-
# β
|
| 2086 |
if use_web_search:
|
| 2087 |
status.update(label="Searching the webβ¦")
|
| 2088 |
with st.spinner("Searchingβ¦"):
|
|
@@ -2117,7 +2135,7 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2117 |
with st.spinner("Processing filesβ¦"):
|
| 2118 |
file_content = process_uploaded_files(uploaded_files)
|
| 2119 |
|
| 2120 |
-
# β£ κ΅°μ¬ μ μ λ°μ΄ν°
|
| 2121 |
if is_military_query(prompt):
|
| 2122 |
status.update(label="Searching military tactics datasetβ¦")
|
| 2123 |
with st.spinner("Loading military insightsβ¦"):
|
|
@@ -2132,13 +2150,13 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2132 |
f"**Defense Reasoning:** {row['defense_reasoning']}\n\n---\n"
|
| 2133 |
)
|
| 2134 |
|
| 2135 |
-
#
|
| 2136 |
user_content = prompt
|
| 2137 |
-
for
|
| 2138 |
-
if
|
| 2139 |
-
user_content += "\n\n" +
|
| 2140 |
|
| 2141 |
-
# λ΄λΆ λΆμ
|
| 2142 |
status.update(label="λΆμ μ€β¦")
|
| 2143 |
decision_purpose = identify_decision_purpose(prompt)
|
| 2144 |
relevance_scores = compute_relevance_scores(prompt, PHYS_CATEGORIES)
|
|
@@ -2173,7 +2191,7 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2173 |
for c, s in decision_purpose['constraints']:
|
| 2174 |
purpose_info += f"- **{c}** (κ΄λ ¨μ±: {s})\n"
|
| 2175 |
|
| 2176 |
-
#
|
| 2177 |
framework_contents = []
|
| 2178 |
for fw in selected_frameworks:
|
| 2179 |
if fw == "swot":
|
|
@@ -2188,7 +2206,6 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2188 |
framework_contents.append(
|
| 2189 |
format_business_framework_analysis("bcg", analyze_with_bcg(prompt))
|
| 2190 |
)
|
| 2191 |
-
# sunzi λ± νμ μ μΆκ°
|
| 2192 |
|
| 2193 |
if framework_contents:
|
| 2194 |
user_content += "\n\n## (Optional) κΈ°ν νλ μμν¬ λΆμ\n\n" + "\n\n".join(framework_contents)
|
|
@@ -2203,15 +2220,11 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2203 |
{"role": "user", "content": user_content},
|
| 2204 |
]
|
| 2205 |
|
| 2206 |
-
#
|
| 2207 |
-
# β¬οΈ 1. μμ ν μ€νΈλ¦¬λ° νΈμΆ (backoff μ¬μλ)
|
| 2208 |
@backoff.on_exception(
|
| 2209 |
-
|
| 2210 |
-
(RemoteProtocolError, APITimeoutError, APIError),
|
| 2211 |
-
max_tries=3,
|
| 2212 |
-
jitter=None
|
| 2213 |
)
|
| 2214 |
-
def
|
| 2215 |
return client.chat.completions.create(
|
| 2216 |
model="gpt-4.1-mini",
|
| 2217 |
messages=api_messages,
|
|
@@ -2222,13 +2235,13 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2222 |
)
|
| 2223 |
|
| 2224 |
try:
|
| 2225 |
-
stream =
|
| 2226 |
for chunk in stream:
|
| 2227 |
if chunk.choices and chunk.choices[0].delta.content:
|
| 2228 |
full_response += chunk.choices[0].delta.content
|
| 2229 |
stream_placeholder.markdown(full_response + "β")
|
| 2230 |
except (RemoteProtocolError, APITimeoutError, APIError) as stream_err:
|
| 2231 |
-
logging.warning(f"
|
| 2232 |
resp = client.chat.completions.create(
|
| 2233 |
model="gpt-4.1-mini",
|
| 2234 |
messages=api_messages,
|
|
@@ -2239,11 +2252,10 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2239 |
)
|
| 2240 |
full_response = resp.choices[0].message.content
|
| 2241 |
stream_placeholder.markdown(full_response)
|
| 2242 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2243 |
|
| 2244 |
status.update(label="Invention ideas created!", state="complete")
|
| 2245 |
|
| 2246 |
-
#
|
| 2247 |
img_data = img_caption = None
|
| 2248 |
if st.session_state.generate_image and full_response:
|
| 2249 |
match = re.search(r"###\s*μ΄λ―Έμ§\s*ν둬ννΈ\s*\n+([^\n]+)", full_response, re.I)
|
|
@@ -2256,7 +2268,7 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2256 |
if img_data:
|
| 2257 |
st.image(img_data, caption=f"Visualized Concept β {img_caption}")
|
| 2258 |
|
| 2259 |
-
# μΈμ
λ©μμ§
|
| 2260 |
answer_msg = {"role": "assistant", "content": full_response}
|
| 2261 |
if img_data:
|
| 2262 |
answer_msg["image"] = img_data
|
|
@@ -2264,7 +2276,7 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2264 |
st.session_state["_skip_dup_idx"] = len(st.session_state.messages)
|
| 2265 |
st.session_state.messages.append(answer_msg)
|
| 2266 |
|
| 2267 |
-
# λ€μ΄λ‘λ
|
| 2268 |
st.subheader("Download This Output")
|
| 2269 |
col_md, col_html = st.columns(2)
|
| 2270 |
col_md.download_button(
|
|
@@ -2292,6 +2304,7 @@ def process_input(prompt: str, uploaded_files):
|
|
| 2292 |
{"role": "assistant", "content": f"β οΈ μ€λ₯: {e}"}
|
| 2293 |
)
|
| 2294 |
|
|
|
|
| 2295 |
def main():
|
| 2296 |
idea_generator_app()
|
| 2297 |
|
|
|
|
| 18 |
import PyPDF2 # For handling PDF files
|
| 19 |
from collections import Counter
|
| 20 |
|
| 21 |
+
from openai import OpenAI, APIError, APITimeoutError
|
| 22 |
from gradio_client import Client
|
| 23 |
from kaggle.api.kaggle_api_extended import KaggleApi
|
| 24 |
import tempfile
|
|
|
|
| 30 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 31 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 32 |
|
| 33 |
+
# βββ λ€νΈμν¬ μμ νμ© λΌμ΄λΈλ¬λ¦¬ ββββββββββββββββββββββββββββββββββββββ
|
| 34 |
import httpx
|
| 35 |
from httpx import RemoteProtocolError
|
| 36 |
+
|
| 37 |
+
# βΈ backoff λͺ¨λμ΄ μμΌλ©΄ μ¦μμμ λ체 ꡬν
|
| 38 |
+
try:
|
| 39 |
+
import backoff
|
| 40 |
+
except ImportError: # β μ΅μ΄ μ€ν νκ²½μμ λ°μ
|
| 41 |
+
logging.warning("`backoff` λͺ¨λμ΄ μμ΄ κ°λ¨ λ체 λ°μ½λ μ΄ν°λ₯Ό μ¬μ©ν©λλ€.")
|
| 42 |
+
def _simple_backoff_on_exception(exc_tuple, max_tries=3, base=2):
|
| 43 |
+
def decorator(fn):
|
| 44 |
+
def wrapper(*args, **kwargs):
|
| 45 |
+
for attempt in range(1, max_tries + 1):
|
| 46 |
+
try:
|
| 47 |
+
return fn(*args, **kwargs)
|
| 48 |
+
except exc_tuple as e:
|
| 49 |
+
if attempt == max_tries:
|
| 50 |
+
raise
|
| 51 |
+
sleep = base ** attempt
|
| 52 |
+
logging.info(f"Retry {attempt}/{max_tries} after {sleep}s ({e})")
|
| 53 |
+
time.sleep(sleep)
|
| 54 |
+
return wrapper
|
| 55 |
+
return decorator
|
| 56 |
+
class _DummyBackoff:
|
| 57 |
+
on_exception = _simple_backoff_on_exception
|
| 58 |
+
backoff = _DummyBackoff() # λμΌ API μ 곡
|
| 59 |
|
| 60 |
# βββββββββββββββββββββββββββββββ Environment Variables / Constants βββββββββββββββββββββββββ
|
| 61 |
|
|
|
|
| 71 |
os.environ["KAGGLE_USERNAME"] = KAGGLE_USERNAME
|
| 72 |
os.environ["KAGGLE_KEY"] = KAGGLE_KEY
|
| 73 |
|
|
|
|
|
|
|
|
|
|
| 74 |
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
|
| 75 |
IMAGE_API_URL = "http://211.233.58.201:7896" # μμ μ΄λ―Έμ§ μμ±μ© API
|
| 76 |
+
MAX_TOKENS = 4096 # μμ ν ν ν° νλ
|
| 77 |
|
| 78 |
# βββββββββββββββββββββββββββββββ Logging βββββββββββββββββββββββββββββββ
|
| 79 |
logging.basicConfig(
|
|
|
|
| 2057 |
def process_input(prompt: str, uploaded_files):
|
| 2058 |
"""
|
| 2059 |
λ©μΈ μ±ν
μ
λ ₯μ λ°μ λμμΈ/λ°λͺ
μμ΄λμ΄λ₯Ό μμ±νλ€.
|
| 2060 |
+
μ€νΈλ¦¬λ° μ€ν¨(RemoteProtocolError λ±) μ backoff μ¬μλ ν
|
| 2061 |
+
μ΅μ’
μ μΌλ‘ non-stream νΈμΆλ‘ ν΄λ°±.
|
| 2062 |
"""
|
| 2063 |
+
# βββ λν κΈ°λ‘ μ€λ³΅ λ°©μ§ ββββββββββββββββββββββββββββββ
|
| 2064 |
if not any(m["role"] == "user" and m["content"] == prompt for m in st.session_state.messages):
|
| 2065 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 2066 |
with st.chat_message("user"):
|
| 2067 |
st.markdown(prompt)
|
| 2068 |
|
|
|
|
| 2069 |
for i in range(len(st.session_state.messages) - 1):
|
| 2070 |
if (st.session_state.messages[i]["role"] == "user"
|
| 2071 |
and st.session_state.messages[i]["content"] == prompt
|
| 2072 |
and st.session_state.messages[i + 1]["role"] == "assistant"):
|
| 2073 |
return
|
| 2074 |
|
| 2075 |
+
# βββ κ²°κ³Ό μμ± βββββββββββββββββββββββββββββββββββββββ
|
| 2076 |
with st.chat_message("assistant"):
|
| 2077 |
status = st.status("Preparing to generate invention ideasβ¦")
|
| 2078 |
stream_placeholder = st.empty()
|
|
|
|
| 2084 |
|
| 2085 |
selected_cat = st.session_state.get("category_focus", None)
|
| 2086 |
selected_frameworks = st.session_state.get("selected_frameworks", [])
|
|
|
|
|
|
|
| 2087 |
sys_prompt = get_idea_system_prompt(
|
| 2088 |
selected_category=selected_cat,
|
| 2089 |
selected_frameworks=selected_frameworks
|
|
|
|
| 2098 |
use_kaggle = st.session_state.kaggle_enabled
|
| 2099 |
has_uploaded = bool(uploaded_files)
|
| 2100 |
|
| 2101 |
+
search_content = kaggle_content = file_content = mil_content = None
|
| 2102 |
|
| 2103 |
+
# β μΉ κ²μ
|
| 2104 |
if use_web_search:
|
| 2105 |
status.update(label="Searching the webβ¦")
|
| 2106 |
with st.spinner("Searchingβ¦"):
|
|
|
|
| 2135 |
with st.spinner("Processing filesβ¦"):
|
| 2136 |
file_content = process_uploaded_files(uploaded_files)
|
| 2137 |
|
| 2138 |
+
# β£ κ΅°μ¬ μ μ λ°μ΄ν°
|
| 2139 |
if is_military_query(prompt):
|
| 2140 |
status.update(label="Searching military tactics datasetβ¦")
|
| 2141 |
with st.spinner("Loading military insightsβ¦"):
|
|
|
|
| 2150 |
f"**Defense Reasoning:** {row['defense_reasoning']}\n\n---\n"
|
| 2151 |
)
|
| 2152 |
|
| 2153 |
+
# βββ μ μ μ½ν
μΈ κ΅¬μ± ββββββββββββββββββββββββββ
|
| 2154 |
user_content = prompt
|
| 2155 |
+
for extra in (search_content, kaggle_content, file_content, mil_content):
|
| 2156 |
+
if extra:
|
| 2157 |
+
user_content += "\n\n" + extra
|
| 2158 |
|
| 2159 |
+
# βββ λ΄λΆ λΆμ βββββββββββββββββββββββββββββββ
|
| 2160 |
status.update(label="λΆμ μ€β¦")
|
| 2161 |
decision_purpose = identify_decision_purpose(prompt)
|
| 2162 |
relevance_scores = compute_relevance_scores(prompt, PHYS_CATEGORIES)
|
|
|
|
| 2191 |
for c, s in decision_purpose['constraints']:
|
| 2192 |
purpose_info += f"- **{c}** (κ΄λ ¨μ±: {s})\n"
|
| 2193 |
|
| 2194 |
+
# βββ νλ μμν¬ λΆμ (μ΅μ
) ββββββββββββββββββββ
|
| 2195 |
framework_contents = []
|
| 2196 |
for fw in selected_frameworks:
|
| 2197 |
if fw == "swot":
|
|
|
|
| 2206 |
framework_contents.append(
|
| 2207 |
format_business_framework_analysis("bcg", analyze_with_bcg(prompt))
|
| 2208 |
)
|
|
|
|
| 2209 |
|
| 2210 |
if framework_contents:
|
| 2211 |
user_content += "\n\n## (Optional) κΈ°ν νλ μμν¬ λΆμ\n\n" + "\n\n".join(framework_contents)
|
|
|
|
| 2220 |
{"role": "user", "content": user_content},
|
| 2221 |
]
|
| 2222 |
|
| 2223 |
+
# βββ OpenAI Chat νΈμΆ (backoff μ¬μλ) βββββββββββββββββ
|
|
|
|
| 2224 |
@backoff.on_exception(
|
| 2225 |
+
(RemoteProtocolError, APITimeoutError, APIError), max_tries=3
|
|
|
|
|
|
|
|
|
|
| 2226 |
)
|
| 2227 |
+
def safe_stream():
|
| 2228 |
return client.chat.completions.create(
|
| 2229 |
model="gpt-4.1-mini",
|
| 2230 |
messages=api_messages,
|
|
|
|
| 2235 |
)
|
| 2236 |
|
| 2237 |
try:
|
| 2238 |
+
stream = safe_stream()
|
| 2239 |
for chunk in stream:
|
| 2240 |
if chunk.choices and chunk.choices[0].delta.content:
|
| 2241 |
full_response += chunk.choices[0].delta.content
|
| 2242 |
stream_placeholder.markdown(full_response + "β")
|
| 2243 |
except (RemoteProtocolError, APITimeoutError, APIError) as stream_err:
|
| 2244 |
+
logging.warning(f"μ€νΈλ¦¬λ° μ€ν¨, non-stream ν΄λ°±: {stream_err}")
|
| 2245 |
resp = client.chat.completions.create(
|
| 2246 |
model="gpt-4.1-mini",
|
| 2247 |
messages=api_messages,
|
|
|
|
| 2252 |
)
|
| 2253 |
full_response = resp.choices[0].message.content
|
| 2254 |
stream_placeholder.markdown(full_response)
|
|
|
|
| 2255 |
|
| 2256 |
status.update(label="Invention ideas created!", state="complete")
|
| 2257 |
|
| 2258 |
+
# βββ μ΄λ―Έμ§ μμ± ββββββββββββββββββββββββββββββββ
|
| 2259 |
img_data = img_caption = None
|
| 2260 |
if st.session_state.generate_image and full_response:
|
| 2261 |
match = re.search(r"###\s*μ΄λ―Έμ§\s*ν둬ννΈ\s*\n+([^\n]+)", full_response, re.I)
|
|
|
|
| 2268 |
if img_data:
|
| 2269 |
st.image(img_data, caption=f"Visualized Concept β {img_caption}")
|
| 2270 |
|
| 2271 |
+
# βββ μΈμ
λ©μμ§ μ μ₯ βββββββββββββββββββββββββββββ
|
| 2272 |
answer_msg = {"role": "assistant", "content": full_response}
|
| 2273 |
if img_data:
|
| 2274 |
answer_msg["image"] = img_data
|
|
|
|
| 2276 |
st.session_state["_skip_dup_idx"] = len(st.session_state.messages)
|
| 2277 |
st.session_state.messages.append(answer_msg)
|
| 2278 |
|
| 2279 |
+
# βββ λ€μ΄λ‘λ μ΅μ
ββββββββββββββββββββββββββββββ
|
| 2280 |
st.subheader("Download This Output")
|
| 2281 |
col_md, col_html = st.columns(2)
|
| 2282 |
col_md.download_button(
|
|
|
|
| 2304 |
{"role": "assistant", "content": f"β οΈ μ€λ₯: {e}"}
|
| 2305 |
)
|
| 2306 |
|
| 2307 |
+
|
| 2308 |
def main():
|
| 2309 |
idea_generator_app()
|
| 2310 |
|