Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,21 +16,32 @@ def get_client(model_name):
|
|
| 16 |
return InferenceClient(LLM_MODELS[model_name], token=os.getenv("HF_TOKEN"))
|
| 17 |
|
| 18 |
def analyze_file_content(content, file_type):
|
| 19 |
-
"""νμΌ λ΄μ©μ λΆμνμ¬
|
| 20 |
if file_type == 'parquet':
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
#
|
| 24 |
lines = content.split('\n')
|
| 25 |
total_lines = len(lines)
|
| 26 |
non_empty_lines = len([line for line in lines if line.strip()])
|
| 27 |
|
| 28 |
-
|
|
|
|
| 29 |
functions = len([line for line in lines if 'def ' in line])
|
| 30 |
classes = len([line for line in lines if 'class ' in line])
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def read_uploaded_file(file):
|
| 36 |
if file is None:
|
|
@@ -57,38 +68,49 @@ def format_history(history):
|
|
| 57 |
return formatted_history
|
| 58 |
|
| 59 |
def chat(message, history, uploaded_file, model_name, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
|
| 60 |
-
system_prefix = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
if uploaded_file:
|
| 63 |
content, file_type = read_uploaded_file(uploaded_file)
|
| 64 |
if file_type == "error":
|
| 65 |
-
|
|
|
|
| 66 |
|
| 67 |
-
# νμΌ λ΄μ© λΆμ λ° μμ½
|
| 68 |
file_summary = analyze_file_content(content, file_type)
|
| 69 |
|
| 70 |
if file_type == 'parquet':
|
| 71 |
system_message += f"\n\nνμΌ λ΄μ©:\n```markdown\n{content}\n```"
|
| 72 |
else:
|
| 73 |
-
system_message += f"\n\nνμΌ λ΄μ©:\n
|
| 74 |
|
| 75 |
if message == "νμΌ λΆμμ μμν©λλ€.":
|
| 76 |
-
message = f"""[
|
| 77 |
|
| 78 |
-
λ€μ
|
| 79 |
-
1. νμΌμ
|
| 80 |
-
2. μ£Όμ
|
| 81 |
-
3.
|
| 82 |
-
4.
|
| 83 |
-
5.
|
|
|
|
| 84 |
|
| 85 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
| 86 |
messages.extend(format_history(history))
|
| 87 |
messages.append({"role": "user", "content": message})
|
| 88 |
|
| 89 |
-
response = ""
|
| 90 |
try:
|
| 91 |
client = get_client(model_name)
|
|
|
|
|
|
|
| 92 |
for msg in client.chat_completion(
|
| 93 |
messages,
|
| 94 |
max_tokens=max_tokens,
|
|
@@ -98,14 +120,12 @@ def chat(message, history, uploaded_file, model_name, system_message="", max_tok
|
|
| 98 |
):
|
| 99 |
token = msg.choices[0].delta.get('content', None)
|
| 100 |
if token:
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
return "", history
|
| 105 |
except Exception as e:
|
| 106 |
error_msg = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
| 107 |
-
|
| 108 |
-
return "", history
|
| 109 |
|
| 110 |
css = """
|
| 111 |
footer {visibility: hidden}
|
|
@@ -132,7 +152,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
| 132 |
)
|
| 133 |
|
| 134 |
file_upload = gr.File(
|
| 135 |
-
label="νμΌ μ
λ‘λ",
|
| 136 |
file_types=["text", ".parquet"],
|
| 137 |
type="filepath"
|
| 138 |
)
|
|
@@ -147,25 +167,31 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
| 147 |
msg.submit(
|
| 148 |
chat,
|
| 149 |
inputs=[msg, chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
|
| 150 |
-
outputs=[msg, chatbot]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
)
|
| 152 |
|
| 153 |
# νμΌ μ
λ‘λ μ μλ λΆμ
|
| 154 |
file_upload.change(
|
| 155 |
chat,
|
| 156 |
inputs=[gr.Textbox(value="νμΌ λΆμμ μμν©λλ€."), chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
|
| 157 |
-
outputs=[msg, chatbot]
|
|
|
|
| 158 |
)
|
| 159 |
|
| 160 |
# μμ μΆκ°
|
| 161 |
gr.Examples(
|
| 162 |
examples=[
|
| 163 |
-
["
|
| 164 |
-
["
|
| 165 |
-
["
|
| 166 |
-
["
|
| 167 |
-
["
|
| 168 |
-
["
|
| 169 |
],
|
| 170 |
inputs=msg,
|
| 171 |
)
|
|
|
|
| 16 |
return InferenceClient(LLM_MODELS[model_name], token=os.getenv("HF_TOKEN"))
|
| 17 |
|
| 18 |
def analyze_file_content(content, file_type):
|
| 19 |
+
"""νμΌ λ΄μ©μ λΆμνμ¬ κ΅¬μ‘°μ μμ½μ λ°ν"""
|
| 20 |
if file_type == 'parquet':
|
| 21 |
+
try:
|
| 22 |
+
# Parquet νμΌ κ΅¬μ‘° λΆμ
|
| 23 |
+
columns = content.split('\n')[0].count('|') - 1
|
| 24 |
+
rows = content.count('\n') - 2 # ν€λμ ꡬλΆμ μ μΈ
|
| 25 |
+
return f"λ°μ΄ν°μ
ꡬ쑰: {columns}κ° μ»¬λΌ, {rows}κ° λ°μ΄ν° μν"
|
| 26 |
+
except:
|
| 27 |
+
return "λ°μ΄ν°μ
ꡬ쑰 λΆμ μ€ν¨"
|
| 28 |
|
| 29 |
+
# ν
μ€νΈ/μ½λ νμΌμ κ²½μ°
|
| 30 |
lines = content.split('\n')
|
| 31 |
total_lines = len(lines)
|
| 32 |
non_empty_lines = len([line for line in lines if line.strip()])
|
| 33 |
|
| 34 |
+
# μ½λ νμΌ νΉμ§ λΆμ
|
| 35 |
+
if any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function']):
|
| 36 |
functions = len([line for line in lines if 'def ' in line])
|
| 37 |
classes = len([line for line in lines if 'class ' in line])
|
| 38 |
+
imports = len([line for line in lines if 'import ' in line or 'from ' in line])
|
| 39 |
+
return f"μ½λ ꡬ쑰 λΆμ: μ΄ {total_lines}μ€ (ν¨μ {functions}κ°, ν΄λμ€ {classes}κ°, μν¬νΈ {imports}κ°)"
|
| 40 |
+
|
| 41 |
+
# μΌλ° ν
μ€νΈ λ¬Έμ λΆμ
|
| 42 |
+
paragraphs = content.count('\n\n') + 1
|
| 43 |
+
words = len(content.split())
|
| 44 |
+
return f"λ¬Έμ ꡬ쑰 λΆμ: μ΄ {total_lines}μ€, {paragraphs}κ° λ¬Έλ¨, μ½ {words}κ° λ¨μ΄"
|
| 45 |
|
| 46 |
def read_uploaded_file(file):
|
| 47 |
if file is None:
|
|
|
|
| 68 |
return formatted_history
|
| 69 |
|
| 70 |
def chat(message, history, uploaded_file, model_name, system_message="", max_tokens=4000, temperature=0.7, top_p=0.9):
|
| 71 |
+
system_prefix = """λλ νμΌ λΆμ μ λ¬Έκ°μ
λλ€. μ
λ‘λλ νμΌμ λ΄μ©μ κΉμ΄ μκ² λΆμνμ¬ λ€μκ³Ό κ°μ κ΄μ μμ μ€λͺ
ν΄μΌ ν©λλ€:
|
| 72 |
+
|
| 73 |
+
1. νμΌμ μ λ°μ μΈ κ΅¬μ‘°μ ꡬμ±
|
| 74 |
+
2. μ£Όμ λ΄μ©κ³Ό ν¨ν΄ λΆμ
|
| 75 |
+
3. λ°μ΄ν°μ νΉμ§κ³Ό μλ―Έ
|
| 76 |
+
4. μ μ¬μ νμ© λ°©μ
|
| 77 |
+
5. μ£Όμν΄μΌ ν μ μ΄λ κ°μ κ°λ₯ν λΆλΆ
|
| 78 |
+
|
| 79 |
+
μ λ¬Έκ°μ κ΄μ μμ μμΈνκ³ κ΅¬μ‘°μ μΈ λΆμμ μ 곡νλ, μ΄ν΄νκΈ° μ½κ² μ€λͺ
νμΈμ. λΆμ κ²°κ³Όλ Markdown νμμΌλ‘ μμ±νκ³ , κ°λ₯ν ν ꡬ체μ μΈ μμλ₯Ό ν¬ν¨νμΈμ."""
|
| 80 |
|
| 81 |
if uploaded_file:
|
| 82 |
content, file_type = read_uploaded_file(uploaded_file)
|
| 83 |
if file_type == "error":
|
| 84 |
+
yield "", history + [[message, content]]
|
| 85 |
+
return
|
| 86 |
|
| 87 |
+
# νμΌ λ΄μ© λΆμ λ° κ΅¬μ‘°μ μμ½
|
| 88 |
file_summary = analyze_file_content(content, file_type)
|
| 89 |
|
| 90 |
if file_type == 'parquet':
|
| 91 |
system_message += f"\n\nνμΌ λ΄μ©:\n```markdown\n{content}\n```"
|
| 92 |
else:
|
| 93 |
+
system_message += f"\n\nνμΌ λ΄μ©:\n```\n{content}\n```"
|
| 94 |
|
| 95 |
if message == "νμΌ λΆμμ μμν©λλ€.":
|
| 96 |
+
message = f"""[ꡬ쑰 λΆμ] {file_summary}
|
| 97 |
|
| 98 |
+
λ€μ κ΄μ μμ μμΈ λΆμμ μ 곡ν΄μ£ΌμΈμ:
|
| 99 |
+
1. νμΌμ μ λ°μ μΈ κ΅¬μ‘°μ νμ
|
| 100 |
+
2. μ£Όμ λ΄μ© λ° κ΅¬μ±μμ λΆμ
|
| 101 |
+
3. λ°μ΄ν°/λ΄μ©μ νΉμ§κ³Ό ν¨ν΄
|
| 102 |
+
4. νμ§ λ° μμ±λ νκ°
|
| 103 |
+
5. κ°μ κ°λ₯ν λΆλΆ μ μ
|
| 104 |
+
6. μ€μ νμ© λ°©μ λ° μΆμ²μ¬ν"""
|
| 105 |
|
| 106 |
messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
|
| 107 |
messages.extend(format_history(history))
|
| 108 |
messages.append({"role": "user", "content": message})
|
| 109 |
|
|
|
|
| 110 |
try:
|
| 111 |
client = get_client(model_name)
|
| 112 |
+
partial_message = ""
|
| 113 |
+
|
| 114 |
for msg in client.chat_completion(
|
| 115 |
messages,
|
| 116 |
max_tokens=max_tokens,
|
|
|
|
| 120 |
):
|
| 121 |
token = msg.choices[0].delta.get('content', None)
|
| 122 |
if token:
|
| 123 |
+
partial_message += token
|
| 124 |
+
yield "", history + [[message, partial_message]]
|
| 125 |
+
|
|
|
|
| 126 |
except Exception as e:
|
| 127 |
error_msg = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
| 128 |
+
yield "", history + [[message, error_msg]]
|
|
|
|
| 129 |
|
| 130 |
css = """
|
| 131 |
footer {visibility: hidden}
|
|
|
|
| 152 |
)
|
| 153 |
|
| 154 |
file_upload = gr.File(
|
| 155 |
+
label="νμΌ μ
λ‘λ (ν
μ€νΈ, μ½λ, λ°μ΄ν° νμΌ)",
|
| 156 |
file_types=["text", ".parquet"],
|
| 157 |
type="filepath"
|
| 158 |
)
|
|
|
|
| 167 |
msg.submit(
|
| 168 |
chat,
|
| 169 |
inputs=[msg, chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
|
| 170 |
+
outputs=[msg, chatbot],
|
| 171 |
+
queue=True
|
| 172 |
+
).then(
|
| 173 |
+
lambda: gr.update(interactive=True),
|
| 174 |
+
None,
|
| 175 |
+
[msg]
|
| 176 |
)
|
| 177 |
|
| 178 |
# νμΌ μ
λ‘λ μ μλ λΆμ
|
| 179 |
file_upload.change(
|
| 180 |
chat,
|
| 181 |
inputs=[gr.Textbox(value="νμΌ λΆμμ μμν©λλ€."), chatbot, file_upload, model_name, system_message, max_tokens, temperature, top_p],
|
| 182 |
+
outputs=[msg, chatbot],
|
| 183 |
+
queue=True
|
| 184 |
)
|
| 185 |
|
| 186 |
# μμ μΆκ°
|
| 187 |
gr.Examples(
|
| 188 |
examples=[
|
| 189 |
+
["νμΌμ μ λ°μ μΈ κ΅¬μ‘°μ νΉμ§μ μμΈν μ€λͺ
ν΄μ£ΌμΈμ."],
|
| 190 |
+
["μ΄ νμΌμ μ£Όμ ν¨ν΄κ³Ό νΉμ§μ λΆμν΄μ£ΌμΈμ."],
|
| 191 |
+
["νμΌμ νμ§κ³Ό κ°μ κ°λ₯ν λΆλΆμ νκ°ν΄μ£ΌμΈμ."],
|
| 192 |
+
["μ΄ νμΌμ μ€μ λ‘ μ΄λ»κ² νμ©ν μ μμκΉμ?"],
|
| 193 |
+
["νμΌμ μ£Όμ λ΄μ©μ μμ½νκ³ ν΅μ¬ μΈμ¬μ΄νΈλ₯Ό λμΆν΄μ£ΌμΈμ."],
|
| 194 |
+
["μ΄μ λΆμμ μ΄μ΄μ λ μμΈν μ€λͺ
ν΄μ£ΌμΈμ."],
|
| 195 |
],
|
| 196 |
inputs=msg,
|
| 197 |
)
|