PraneshJs commited on
Commit
79ceb0f
·
verified ·
1 Parent(s): d7c6008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -233
app.py CHANGED
@@ -1,248 +1,143 @@
1
- import os, tempfile, subprocess, shutil, fnmatch, json, requests, re, pickle, time, threading
 
2
  import gradio as gr
3
- from dotenv import load_dotenv
 
4
 
5
- # ---------------------------------
6
- # Setup
7
- # ---------------------------------
8
- load_dotenv()
9
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
10
- MEMORY_FILE = "repogpt_memory.pkl"
11
- MEMORY_TTL = 600 # 10 minutes (600 seconds)
12
- _last_active_time = time.time()
13
-
14
- # ---------------------------------
15
- # Persistent Memory Management
16
- # ---------------------------------
17
- def save_memory(data):
18
- data["_timestamp"] = time.time()
19
- with open(MEMORY_FILE, "wb") as f:
20
- pickle.dump(data, f)
21
-
22
- def load_memory():
23
- if os.path.exists(MEMORY_FILE):
24
- try:
25
- with open(MEMORY_FILE, "rb") as f:
26
- data = pickle.load(f)
27
- # Expire memory if inactive > TTL
28
- if time.time() - data.get("_timestamp", 0) > MEMORY_TTL:
29
- print("🧹 Memory expired (10 min inactive). Resetting RepoGPT state.")
30
- os.remove(MEMORY_FILE)
31
- return {"chat_history": [], "repo_context": ""}
32
- return data
33
- except Exception:
34
- return {"chat_history": [], "repo_context": ""}
35
- return {"chat_history": [], "repo_context": ""}
36
-
37
- def clear_memory_periodically():
38
- """Background thread to auto-clear memory after TTL."""
39
- global _last_active_time
40
- while True:
41
- time.sleep(60)
42
- if time.time() - _last_active_time > MEMORY_TTL:
43
- if os.path.exists(MEMORY_FILE):
44
  try:
45
- os.remove(MEMORY_FILE)
46
- print("🧹 Auto-cleared RepoGPT memory after 10 minutes of inactivity.")
47
- except:
 
 
48
  pass
49
- break
50
-
51
- MEMORY = load_memory()
52
- threading.Thread(target=clear_memory_periodically, daemon=True).start()
53
-
54
- # ---------------------------------
55
- # Repo Handling
56
- # ---------------------------------
57
- def clone_repo(repo_url):
58
- tmp_dir = tempfile.mkdtemp()
59
- subprocess.run(
60
- ["git", "clone", "--depth", "1", repo_url, tmp_dir],
61
- stdout=subprocess.PIPE,
62
- stderr=subprocess.PIPE
63
- )
64
- return tmp_dir
65
-
66
- def read_repo_files(repo_dir):
67
- patterns = ["*.py", "*.js", "*.jsx", "*.ts", "*.tsx", "*.html", "*.json",
68
- "*.go", "*.java", "*.vue", "*.md", "*.css"]
69
- repo_content = []
70
- for root, _, files in os.walk(repo_dir):
71
- for pattern in patterns:
72
- for filename in fnmatch.filter(files, pattern):
73
- filepath = os.path.join(root, filename)
74
- try:
75
- with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
76
- content = f.read()
77
- if len(content) > 4000:
78
- content = content[:4000]
79
- repo_content.append({"filename": filename, "content": content})
80
- except:
81
- pass
82
- return repo_content
83
-
84
- # ---------------------------------
85
- # Tech Stack Auto-Detection
86
- # ---------------------------------
87
- def detect_tech_stack(repo_files):
88
- all_text = " ".join(f["content"] for f in repo_files).lower()
89
- stack = set()
90
- tech_keywords = {
91
- "python": ["def ", "import ", "flask", "django", "fastapi"],
92
- "javascript": ["function", "const ", "react", "vue", "next.js", "node"],
93
- "typescript": ["typescript", "tsconfig", ".tsx"],
94
- "java": ["public class", "springboot", "maven", "gradle"],
95
- "go": ["package main", "func main", "go mod"],
96
- "html": ["<html", "<body", "<div"],
97
- "css": ["color:", "background", "font-family"],
98
- "docker": ["from ", "dockerfile"],
99
- "database": ["sql", "select", "insert", "mongodb", "mongoose", "prisma"],
100
- "ai/ml": ["torch", "tensorflow", "sklearn", "transformers", "model"],
101
- }
102
- for tech, keywords in tech_keywords.items():
103
- if any(k in all_text for k in keywords):
104
- stack.add(tech)
105
- if not stack:
106
- return "No clear tech stack detected."
107
- return "🧰 Detected Tech Stack: " + ", ".join(sorted(stack))
108
-
109
- # ---------------------------------
110
- # Code Search
111
- # ---------------------------------
112
- def search_code_snippets(repo_files, query, top_k=3):
113
- query = query.lower()
114
- matches = []
115
- for file in repo_files:
116
- lines = file["content"].splitlines()
117
- for i, line in enumerate(lines):
118
- if query in line.lower():
119
- start = max(0, i - 2)
120
- end = min(len(lines), i + 3)
121
- snippet = "\n".join(lines[start:end])
122
- matches.append((file["filename"], snippet))
123
- matches = sorted(matches, key=lambda x: len(x[1]), reverse=True)[:top_k]
124
- if not matches:
125
- return ""
126
- return "\n\n".join([f"📄 {m[0]}:\n{m[1]}" for m in matches])
127
-
128
- # ---------------------------------
129
- # OpenRouter API
130
- # ---------------------------------
131
- def call_openrouter(messages, model="google/gemma-3-27b-it:free"):
132
- try:
133
- res = requests.post(
134
- "https://openrouter.ai/api/v1/chat/completions",
135
- headers={
136
- "Authorization": f"Bearer {OPENROUTER_API_KEY}",
137
- "Content-Type": "application/json"
138
- },
139
- json={"model": model, "messages": messages},
140
- timeout=180
141
- )
142
- res.raise_for_status()
143
- return res.json()["choices"][0]["message"]["content"]
144
- except Exception as e:
145
- return f"⚠️ API Error: {e}"
146
-
147
- # ---------------------------------
148
- # Chat Logic
149
- # ---------------------------------
150
- def create_system_prompt(repo_context=None):
151
- base = (
152
- "You are RepoGPT — a helpful AI assistant for GitHub repositories. "
153
- "You can explain code, summarize architecture, identify frameworks, search code, and generate ideas. "
154
- "Be conversational and beginner-friendly."
155
- )
156
- if repo_context:
157
- base += f"\n\nRepository context:\n{repo_context[:8000]}"
158
- return {"role": "system", "content": base}
159
-
160
- def load_repository(repo_url):
161
- global _last_active_time
162
- _last_active_time = time.time()
163
- if not repo_url or "github.com" not in repo_url:
164
- return "⚠️ Please provide a valid GitHub repository URL.", "", ""
165
  repo_dir = None
166
  try:
167
- repo_dir = clone_repo(repo_url)
168
- repo_files = read_repo_files(repo_dir)
169
- stack_info = detect_tech_stack(repo_files)
170
- file_summary = "\n".join(f["filename"] for f in repo_files[:50])
171
- context = f"{stack_info}\n\nFiles found:\n{file_summary}"
172
- MEMORY["repo_context"] = "\n".join(f["content"] for f in repo_files)
173
- save_memory(MEMORY)
174
- return f"✅ Repo loaded successfully!\n\n{stack_info}", context, repo_files
175
  except Exception as e:
176
- return f"❌ Failed to load repo: {e}", "", []
177
  finally:
178
- if repo_dir and os.path.exists(repo_dir):
179
  shutil.rmtree(repo_dir, ignore_errors=True)
180
 
181
- def chat_with_repo(message, history, repo_context, repo_files):
182
- global _last_active_time
183
- _last_active_time = time.time()
184
-
185
- MEMORY["chat_history"] = history
186
- save_memory(MEMORY)
187
-
188
- code_snippet = ""
189
- if any(k in message.lower() for k in ["function", "class", "api", "code", "def ", "explain"]):
190
- code_snippet = search_code_snippets(repo_files, message)
191
-
192
- messages = [create_system_prompt(repo_context)]
193
- for user, bot in history[-6:]:
194
- messages.append({"role": "user", "content": user})
195
- messages.append({"role": "assistant", "content": bot})
196
- if code_snippet:
197
- message += f"\n\nRelevant Code Snippet:\n{code_snippet}"
198
- messages.append({"role": "user", "content": message})
199
-
200
- reply = call_openrouter(messages)
201
- MEMORY["chat_history"].append((message, reply))
202
- save_memory(MEMORY)
203
- return reply
204
-
205
- # ---------------------------------
206
- # Gradio UI
207
- # ---------------------------------
208
- with gr.Blocks(theme=gr.themes.Soft(), title="RepoGPT") as demo:
209
- gr.Markdown("""
210
- # 🤖 RepoGPT
211
- **ChatGPT for GitHub Repositories**
212
 
213
- 🧩 Features:
214
- - 🔍 Smart Code Search
215
- - 💾 Persistent Memory (auto-clears after 10 min)
216
- - 🧠 Tech-Stack Detection
217
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  with gr.Row():
220
- repo_input = gr.Textbox(label="🔗 GitHub Repository URL", placeholder="https://github.com/yourname/yourproject")
221
- load_btn = gr.Button("📂 Load Repo")
222
-
223
- repo_status = gr.Markdown("ℹ️ No repository loaded yet.")
224
- repo_context_state = gr.State(MEMORY.get("repo_context", ""))
225
- repo_files_state = gr.State([])
226
-
227
- def on_load_repo(link):
228
- return load_repository(link)
229
-
230
- load_btn.click(on_load_repo, inputs=[repo_input], outputs=[repo_status, repo_context_state, repo_files_state])
231
-
232
- chatbot = gr.ChatInterface(
233
- fn=lambda msg, history, repo_ctx, repo_files: chat_with_repo(msg, history, repo_ctx, repo_files),
234
- additional_inputs=[repo_context_state, repo_files_state],
235
- title="RepoGPT",
236
- description="Your AI companion for understanding and improving GitHub projects.",
237
- examples=[
238
- ["Summarize this repo."],
239
- ["Explain the backend structure."],
240
- ["Find where login is implemented."],
241
- ["What technologies are used here?"],
242
- ["Generate a LinkedIn post for this project."]
243
- ],
244
- )
245
 
246
- if __name__ == "__main__":
247
- port = int(os.environ.get("PORT", 7860))
248
- demo.launch(server_name="0.0.0.0", server_port=port, share=False)
 
1
+ import os, shutil, tempfile, re, json
2
+ from pathlib import Path
3
  import gradio as gr
4
+ from git import Repo
5
+ import requests
6
 
7
+ # ---------------- CONFIG ----------------
 
 
 
8
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
9
+ OPENROUTER_MODEL = "nvidia/nemotron-nano-12b-v2-vl:free" # free OpenRouter model
10
+ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
11
+ HEADERS = {
12
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
13
+ "Content-Type": "application/json",
14
+ }
15
+
16
+ ALLOWED_EXT = {
17
+ ".py", ".ipynb", ".md", ".txt", ".js", ".ts", ".tsx", ".jsx", ".java",
18
+ ".kt", ".c", ".cpp", ".cs", ".go", ".rs", ".rb", ".php", ".sql", ".html",
19
+ ".css", ".yml", ".yaml", ".toml", ".ini", ".json"
20
+ }
21
+ SKIP_DIRS = {
22
+ "node_modules", ".git", "dist", "build", "out", "venv", ".venv", "__pycache__",
23
+ ".next", ".cache", "target", "bin", "obj", ".idea", ".vscode"
24
+ }
25
+ MAX_FILE_BYTES = 800_000
26
+
27
+ # ---------------- REPO UTILITIES ----------------
28
+ def clone_repo(url: str) -> Path:
29
+ d = Path(tempfile.mkdtemp(prefix=".tmp_repo_")).resolve()
30
+ Repo.clone_from(url, d, depth=1)
31
+ return d
32
+
33
+ def read_repo_text(repo_dir: Path) -> str:
34
+ buf = []
35
+ for root, dirs, files in os.walk(repo_dir):
36
+ dirs[:] = [x for x in dirs if x not in SKIP_DIRS]
37
+ for f in files:
38
+ p = Path(root) / f
39
+ if p.suffix.lower() in ALLOWED_EXT and p.stat().st_size <= MAX_FILE_BYTES:
 
 
 
40
  try:
41
+ txt = p.read_text(encoding="utf-8", errors="ignore")
42
+ if txt.strip():
43
+ rel = str(p.relative_to(repo_dir))
44
+ buf.append(f"\n=== FILE: {rel} ===\n{txt}")
45
+ except Exception:
46
  pass
47
+ return "\n".join(buf)
48
+
49
+ def analyze_repo(url: str):
50
+ if not url or not re.match(r"^https?://", url.strip()):
51
+ return None, "❌ Invalid URL"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  repo_dir = None
53
  try:
54
+ repo_dir = clone_repo(url.strip())
55
+ text = read_repo_text(repo_dir)
56
+ if not text.strip():
57
+ return None, "⚠️ No readable text files found"
58
+ kb_size = len(text) // 1000
59
+ return text, f" Repo loaded successfully ({kb_size} KB of text)"
 
 
60
  except Exception as e:
61
+ return None, f"❌ Error: {e}"
62
  finally:
63
+ if repo_dir and Path(repo_dir).exists():
64
  shutil.rmtree(repo_dir, ignore_errors=True)
65
 
66
+ # ---------------- OPENROUTER CLIENT ----------------
67
+ def openrouter_chat(system_prompt, user_prompt, context=""):
68
+ messages = [{"role": "system", "content": system_prompt}]
69
+ if context:
70
+ messages.append({"role": "system", "content": f"Repository context:\n{context}"})
71
+ messages.append({"role": "user", "content": user_prompt})
72
+
73
+ payload = {"model": OPENROUTER_MODEL, "messages": messages}
74
+ try:
75
+ r = requests.post(OPENROUTER_URL, headers=HEADERS, json=payload, timeout=120)
76
+ r.raise_for_status()
77
+ obj = r.json()
78
+ if "choices" in obj and obj["choices"]:
79
+ msg = obj["choices"][0]["message"]["content"]
80
+ return msg.strip()
81
+ return "[OpenRouter] Unexpected response format."
82
+ except Exception as e:
83
+ return f"[OpenRouter error] {e}"
84
+
85
+ # ---------------- CHAT LOGIC ----------------
86
+ SYSTEM_PROMPT = (
87
+ "You are an expert developer assistant. You help users explore and understand "
88
+ "a GitHub repository. Base every response strictly on the repo's content and structure. "
89
+ "If unsure, say so. Explain clearly and concisely. Avoid hallucinating."
90
+ )
91
+
92
+ def chat_repo(user_msg, chat_history, repo_text):
93
+ if not repo_text:
94
+ chat_history.append(("", "Please analyze a repository first."))
95
+ return chat_history, ""
 
96
 
97
+ # Truncate context for performance (first ~120k chars)
98
+ context = repo_text[:120000]
99
+ response = openrouter_chat(SYSTEM_PROMPT, user_msg, context)
100
+ chat_history.append((user_msg, response))
101
+ return chat_history, ""
102
+
103
+ # ---------------- GRADIO UI ----------------
104
+ with gr.Blocks(title="Repo Chatbot · OpenRouter") as demo:
105
+ gr.Markdown(
106
+ """
107
+ # 🤖 Repo Chatbot — powered by OpenRouter
108
+ Chat with your GitHub repository!
109
+ Upload a repo URL and ask anything about its **code, structure, or design**.
110
+ _(No embeddings, just pure context reasoning.)_
111
+ """
112
+ )
113
+
114
+ repo_state = gr.State()
115
+ chat_history = gr.State([])
116
 
117
  with gr.Row():
118
+ repo_url = gr.Textbox(
119
+ label="GitHub repo URL",
120
+ placeholder="https://github.com/owner/repo",
121
+ scale=4
122
+ )
123
+ analyze_btn = gr.Button("🔍 Analyze Repo", scale=1)
124
+
125
+ status_box = gr.Markdown()
126
+
127
+ chatbot = gr.Chatbot(label="Repo Chatbot", height=500)
128
+ user_box = gr.Textbox(label="Type your question about the repo and press Enter")
129
+
130
+ clear_btn = gr.Button("🧹 Clear Chat")
131
+
132
+ # ---- CALLBACKS ----
133
+ def analyze_repo_cb(url):
134
+ text, status = analyze_repo(url)
135
+ return text, status
136
+
137
+ analyze_btn.click(analyze_repo_cb, inputs=[repo_url], outputs=[repo_state, status_box])
138
+ user_box.submit(chat_repo, inputs=[user_box, chat_history, repo_state],
139
+ outputs=[chatbot, user_box])
140
+ clear_btn.click(lambda: ([], ""), None, [chatbot, user_box])
 
 
141
 
142
+ demo.queue(concurrency_count=3)
143
+ demo.launch(server_name="0.0.0.0", server_port=7860)