PraneshJs commited on
Commit
07f1ef8
verified
1 Parent(s): 56f90c1

added files to hf space

Browse files
Files changed (2) hide show
  1. app.py +117 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, shutil, tempfile, re
2
+ from pathlib import Path
3
+ from dotenv import load_dotenv
4
+ import gradio as gr
5
+ from git import Repo
6
+ from langchain_openai import AzureChatOpenAI
7
+
8
+ load_dotenv()
9
+ API_KEY=os.getenv("AZURE_OPENAI_API_KEY")
10
+ ENDPOINT=os.getenv("AZURE_OPENAI_ENDPOINT")
11
+ CHAT_DEPLOY=os.getenv("AZURE_OPENAI_DEPLOYMENT")
12
+ API_VERSION=os.getenv("AZURE_OPENAI_VERSION")
13
+
14
+ ALLOWED_EXT={".py",".ipynb",".md",".txt",".js",".ts",".tsx",".jsx",".java",".kt",".c",".cpp",".cs",".go",".rs",".rb",".php",".sql",".html",".css",".yml",".yaml",".toml",".ini",".json"}
15
+ SKIP_DIRS={"node_modules",".git","dist","build","out","venv",".venv","__pycache__",".next",".cache","target","bin","obj",".idea",".vscode"}
16
+ MAX_FILE_BYTES=800_000
17
+
18
+ def clone_repo(url):
19
+ d=Path(tempfile.mkdtemp(prefix=".tmp_repo_")).resolve()
20
+ Repo.clone_from(url, d, depth=1)
21
+ return d
22
+
23
+ def read_repo_text(repo_dir:Path):
24
+ buf=[]
25
+ for root,dirs,files in os.walk(repo_dir):
26
+ dirs[:]=[x for x in dirs if x not in SKIP_DIRS]
27
+ for f in files:
28
+ p=Path(root)/f
29
+ if p.suffix.lower() in ALLOWED_EXT and p.stat().st_size<=MAX_FILE_BYTES:
30
+ try:
31
+ txt=p.read_text(encoding="utf-8",errors="ignore")
32
+ if txt.strip():
33
+ rel=str(p.relative_to(repo_dir))
34
+ buf.append(f"\n=== FILE: {rel} ===\n{txt}")
35
+ except Exception:
36
+ pass
37
+ return "\n".join(buf)
38
+
39
+ def analyze_repo(url):
40
+ if not url or not re.match(r"^https?://",url.strip()): return None,"Invalid URL"
41
+ repo_dir=None
42
+ try:
43
+ repo_dir=clone_repo(url.strip())
44
+ text=read_repo_text(repo_dir)
45
+ if not text.strip(): return None,"No readable text files found"
46
+ return text,"Ready. Repo text loaded."
47
+ except Exception as e:
48
+ return None,f"Error: {e}"
49
+ finally:
50
+ if repo_dir and Path(repo_dir).exists(): shutil.rmtree(repo_dir,ignore_errors=True)
51
+
52
+ def make_llm(temp=0.8):
53
+ return AzureChatOpenAI(
54
+ deployment_name=CHAT_DEPLOY,
55
+ azure_endpoint=ENDPOINT,
56
+ api_key=API_KEY,
57
+ api_version=API_VERSION,
58
+ temperature=temp
59
+ )
60
+
61
+ def generate_qa_from_context(repo_text, n_questions=10):
62
+ llm=make_llm(0.2)
63
+ sys=("You are a principal engineer conducting a rigorous technical interview about a specific GitHub repository. Ask only realistic, challenging interview questions directly grounded in the repo's actual code, configuration, and design choices. Focus on probing the candidate鈥檚 reasoning behind trade-offs. Why they chose one tool, library, or framework over alternatives. Why this approach is better or worse compared to others. How decisions impact performance, scalability, testing, security, and maintainability. Do not ask generic or surface-level question Keep questions precise, technical, and focused on why this, not that reasoning.")
64
+ fmt=("Using only the repo context below, produce {n} Q&A pairs.\n"
65
+ "Context:\n\"\"\"\n{ctx}\n\"\"\"\n"
66
+ "Format strictly:\nQ1: ...\nA1: ...\nQ2: ...\nA2: ...\n... up to Q{n}/A{n}.\n"
67
+ "Do not add extra commentary.")
68
+ prompt=fmt.format(n=n_questions,ctx=repo_text[:100000])
69
+ res=llm.invoke([{"role":"system","content":sys},{"role":"user","content":prompt}])
70
+ return res.content
71
+
72
+ def ask_one(repo_text, topic):
73
+ llm=make_llm(0.2)
74
+ ctx=repo_text[:6000]
75
+ sys=("You are a senior interviewer. Ask ONE tough, repo-specific question, then give the detailed answer.")
76
+ usr=(f"Repo context:\n\"\"\"\n{ctx}\n\"\"\"\n"
77
+ f"Focus: {topic or 'most critical part of this repository'}\n"
78
+ "Output format:\nQ: <question>\nA: <answer>\nNo preamble.")
79
+ res=llm.invoke([{"role":"system","content":sys},{"role":"user","content":usr}])
80
+ return res.content
81
+
82
+ def on_analyze(url):
83
+ repo_text,status=analyze_repo(url)
84
+ return repo_text,status
85
+
86
+ def on_generate(repo_text, n):
87
+ if not repo_text: return "Please analyze a repo first."
88
+ return generate_qa_from_context(repo_text,int(n))
89
+
90
+ def on_ask_one(repo_text, topic):
91
+ if not repo_text: return "Please analyze a repo first."
92
+ return ask_one(repo_text, topic or "")
93
+
94
+ with gr.Blocks(title="Repo Interview Prep 路 Azure OpenAI (No Embeddings)") as demo:
95
+ gr.Markdown("# Repo Interview Prep\nPaste a GitHub repo URL. Get **real interview questions** grounded in its code, with **model answers**. (No embeddings used)")
96
+ repo_state=gr.State()
97
+ with gr.Row():
98
+ repo_url=gr.Textbox(label="GitHub repo URL",placeholder="https://github.com/owner/repo")
99
+ with gr.Row():
100
+ analyze_btn=gr.Button("Analyze Repo")
101
+ analyze_status=gr.Markdown()
102
+ with gr.Row():
103
+ nq=gr.Slider(5,20,step=1,value=10,label="Number of Q&A")
104
+ gen_btn=gr.Button("Generate Q&A")
105
+ qa_out=gr.Markdown(label="Q&A")
106
+ gr.Markdown("### Ask One Question")
107
+ with gr.Row():
108
+ topic=gr.Textbox(label="Optional focus (e.g., auth, DB, CI/CD)")
109
+ ask_btn=gr.Button("Ask One")
110
+ single_out=gr.Markdown()
111
+
112
+ analyze_btn.click(on_analyze,inputs=[repo_url],outputs=[repo_state,analyze_status])
113
+ gen_btn.click(on_generate,inputs=[repo_state,nq],outputs=[qa_out])
114
+ ask_btn.click(on_ask_one,inputs=[repo_state,topic],outputs=[single_out])
115
+
116
+ if __name__=="__main__":
117
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ python-dotenv
2
+ gradio
3
+ gitpython
4
+ langchain
5
+ langchain-openai