pradeep4321 commited on
Commit
b52f1c2
·
verified ·
1 Parent(s): 967d7c5

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +42 -37
src/streamlit_app.py CHANGED
@@ -1,20 +1,20 @@
1
  import streamlit as st
2
  import torch
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
  # ==============================
6
  # PAGE CONFIG
7
  # ==============================
8
  st.set_page_config(page_title="💻 AI Code Generator", layout="wide")
9
-
10
- st.title("💻 AI Code Generator (Stable Version)")
11
 
12
  # ==============================
13
- # LOAD MODEL (SAFE)
14
  # ==============================
15
  @st.cache_resource
16
  def load_model():
17
- model_name = "deepseek-ai/deepseek-coder-1.3b-instruct" # ✅ HF Free Safe
18
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_name)
20
 
@@ -26,33 +26,42 @@ def load_model():
26
  model.eval()
27
  return tokenizer, model
28
 
29
- # Load with spinner (IMPORTANT)
30
- with st.spinner("🔄 Loading AI model... Please wait"):
31
  tokenizer, model = load_model()
32
 
33
  st.success("✅ Model Loaded Successfully")
34
 
35
  # ==============================
36
- # CLEAN OUTPUT
37
  # ==============================
38
- def clean_code(code):
39
- code = code.strip()
 
 
40
 
41
- # Remove unwanted text
42
- remove_words = [
43
- "Explanation:",
44
- "Here is the code:",
45
- "Output:",
46
- "Answer:"
47
- ]
48
 
49
- for word in remove_words:
50
- code = code.replace(word, "")
51
 
52
  # Remove markdown
53
- code = code.replace("```python", "").replace("```", "")
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- return code.strip()
56
 
57
  # ==============================
58
  # GENERATE CODE
@@ -60,19 +69,19 @@ def clean_code(code):
60
  def generate_code(prompt, language):
61
 
62
  full_prompt = f"""
 
63
  You are an expert {language} programmer.
64
 
65
- Write clean, correct, and complete code.
66
-
67
- Rules:
68
  - Only return code
69
- - No explanations
70
- - Complete solution
 
71
 
72
- Task:
73
  {prompt}
74
 
75
- Code:
76
  """
77
 
78
  inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True)
@@ -81,20 +90,16 @@ Code:
81
  with torch.no_grad():
82
  outputs = model.generate(
83
  **inputs,
84
- max_new_tokens=200,
85
  do_sample=True,
86
- temperature=0.3,
87
- top_p=0.9,
88
- repetition_penalty=1.1,
89
  pad_token_id=tokenizer.eos_token_id
90
  )
91
 
92
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
93
-
94
- if "Code:" in result:
95
- result = result.split("Code:")[-1]
96
-
97
- return clean_code(result)
98
 
99
  except Exception as e:
100
  return f"# ERROR: {str(e)}"
@@ -126,7 +131,7 @@ if st.button("Generate Code"):
126
  if not user_prompt.strip():
127
  st.warning("⚠️ Please enter a task")
128
  else:
129
- with st.spinner("⚡ Generating code..."):
130
  code = generate_code(user_prompt, language)
131
 
132
  st.session_state.history.append((user_prompt, code))
 
1
  import streamlit as st
2
  import torch
3
+ import re
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
  # ==============================
7
  # PAGE CONFIG
8
  # ==============================
9
  st.set_page_config(page_title="💻 AI Code Generator", layout="wide")
10
+ st.title("💻 AI Code Generator (Advanced Clean Version)")
 
11
 
12
  # ==============================
13
+ # LOAD MODEL (HF SAFE)
14
  # ==============================
15
  @st.cache_resource
16
  def load_model():
17
+ model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
18
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_name)
20
 
 
26
  model.eval()
27
  return tokenizer, model
28
 
29
+ # Load model with spinner
30
+ with st.spinner("🔄 Loading AI model..."):
31
  tokenizer, model = load_model()
32
 
33
  st.success("✅ Model Loaded Successfully")
34
 
35
  # ==============================
36
+ # CLEAN FUNCTION
37
  # ==============================
38
+ def extract_clean_code(text):
39
+
40
+ if "Code:" in text:
41
+ text = text.split("Code:")[-1]
42
 
43
+ # Remove non-ascii junk
44
+ text = re.sub(r"[^\x00-\x7F]+", "", text)
 
 
 
 
 
45
 
46
+ # Remove unwanted sections
47
+ text = re.sub(r"(Explanation:.*)", "", text, flags=re.DOTALL)
48
 
49
  # Remove markdown
50
+ text = text.replace("```python", "").replace("```", "")
51
+
52
+ # Filter useful lines only
53
+ lines = text.split("\n")
54
+ clean_lines = []
55
+
56
+ for line in lines:
57
+ line = line.strip()
58
+ if not line:
59
+ continue
60
+ if any(word in line.lower() for word in ["instruction", "task", "response"]):
61
+ continue
62
+ clean_lines.append(line)
63
 
64
+ return "\n".join(clean_lines).strip()
65
 
66
  # ==============================
67
  # GENERATE CODE
 
69
  def generate_code(prompt, language):
70
 
71
  full_prompt = f"""
72
+ ### Instruction:
73
  You are an expert {language} programmer.
74
 
75
+ ### Rules:
 
 
76
  - Only return code
77
+ - No explanation
78
+ - Use simple and correct syntax
79
+ - Provide complete solution
80
 
81
+ ### Task:
82
  {prompt}
83
 
84
+ ### Code:
85
  """
86
 
87
  inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True)
 
90
  with torch.no_grad():
91
  outputs = model.generate(
92
  **inputs,
93
+ max_new_tokens=150,
94
  do_sample=True,
95
+ temperature=0.2,
96
+ top_p=0.85,
97
+ repetition_penalty=1.2,
98
  pad_token_id=tokenizer.eos_token_id
99
  )
100
 
101
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
102
+ return extract_clean_code(result)
 
 
 
 
103
 
104
  except Exception as e:
105
  return f"# ERROR: {str(e)}"
 
131
  if not user_prompt.strip():
132
  st.warning("⚠️ Please enter a task")
133
  else:
134
+ with st.spinner("⚡ Generating clean code..."):
135
  code = generate_code(user_prompt, language)
136
 
137
  st.session_state.history.append((user_prompt, code))