Spaces:

luminoria
/

GitSpec

Sleeping

GitSpec / streamlitUI.py

Lumiin0us

GitSpec Initial Deployment

34b79cd about 2 months ago

8.54 kB

	import streamlit as st
	from backend.clone import cloneRepo, cleanupRepo
	from backend.crawl import repoCrawler
	from backend.extract import processPythonFile
	from backend.indexer import indexer
	from backend.historyExtractor import extractHistory
	from backend.historyIndexer import indexHistory
	from backend.router import routeQuery
	from groq import Groq
	from dotenv import load_dotenv
	import os

	load_dotenv()
	groqApiKey = os.getenv("GROQ_API_KEY")

	st.set_page_config(page_title="GitSpec", layout="wide")
	st.title("GitSpec")

	if "client" not in st.session_state:
	st.session_state.client = None
	st.session_state.model = None
	st.session_state.repo_name = ""

	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Sidebar Controls
	with st.sidebar:
	st.header("Settings")
	if st.button("Clear Chat History"):
	st.session_state.messages = []
	st.rerun()
	st.divider()
	contextLimit = st.slider("Context Depth (Snippets)", 3, 10, 6)

	repoUrl = st.text_input("Enter GitHub Repository URL:")

	if st.button("Analyze Repository"):
	if repoUrl:
	st.session_state.repo_name = repoUrl.split('/')[-1].replace('.git', '')
	progressBar = st.progress(0)
	statusText = st.empty()

	statusText.text("Cloning repository...")
	progressBar.progress(10)
	destPath, repo = cloneRepo(repoUrl)

	if destPath and repo:
	progressBar.progress(25)
	statusText.text("Scanning files...")
	files = repoCrawler(destPath)

	progressBar.progress(40)
	statusText.text("Extracting code structures...")
	results = processPythonFile(files, destPath, repo)

	progressBar.progress(55)
	statusText.text("Indexing code...")
	client, model = indexer(results)

	progressBar.progress(70)
	statusText.text("Extracting commit history...")
	commitsFile = extractHistory(repo, destPath)

	progressBar.progress(85)
	statusText.text("Indexing history...")
	client, model = indexHistory(commitsFile, client, model)

	st.session_state.client = client
	st.session_state.model = model
	st.session_state.messages = []

	progressBar.progress(100)
	statusText.text("Cleaning up...")
	cleanupRepo(destPath)

	statusText.empty()
	progressBar.empty()
	st.success(f"'{st.session_state.repo_name}' Repository indexed successfully")
	else:
	statusText.empty()
	progressBar.empty()
	st.error("Failed to clone repository.")
	else:
	st.warning("Please enter a URL.")

	if st.session_state.client:
	st.divider()

	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	if query := st.chat_input("Ask anything about the codebase or its history..."):

	with st.chat_message("user"):
	st.markdown(query)
	st.session_state.messages.append({"role": "user", "content": query})

	groqClient = Groq(api_key=groqApiKey)

	with st.spinner("Thinking..."):

	# Route the query
	route = routeQuery(query)

	queryVector = st.session_state.model.encode(query).tolist()
	contextBlocks = []
	searchResults = []

	# Search the right index(es)
	if route in ["CODE", "BOTH"]:
	codeResponse = st.session_state.client.query_points(
	collection_name="tempCollection",
	query=queryVector,
	limit=contextLimit
	)
	for res in codeResponse.points:
	p = res.payload
	block = (
	f"[CODE] FILE: {p.get('filePath')}\n"
	f"METADATA: Parent={p.get('parentClass')}, Imports={p.get('modules')}\n"
	f"LAST COMMIT: {p.get('history', {}).get('lastCommit', {}).get('msg')}\n"
	f"CODE:\n{p.get('content')}"
	)
	contextBlocks.append(block)
	searchResults.append(("code", res))

	if route in ["HISTORY", "BOTH"]:
	historyResponse = st.session_state.client.query_points(
	collection_name="historyIndex",
	query=queryVector,
	limit=contextLimit
	)
	for res in historyResponse.points:
	p = res.payload
	block = (
	f"[HISTORY] COMMIT: {p.get('sha')} by {p.get('author')} on {p.get('date', '')[:10]}\n"
	f"SUMMARY: {p.get('summary')}\n"
	f"FILES: {', '.join(f['file'] for f in p.get('changes', []))}\n"
	f"CHANGES: {p.get('embedText')}"
	)
	contextBlocks.append(block)
	searchResults.append(("history", res))

	if not contextBlocks:
	fullResponse = "I couldn't find relevant information. Try rephrasing your question."
	else:
	formattedContext = "\n---\n".join(contextBlocks)

	systemPrompts = (
	"You are a Senior Software Architect and code historian for GitSpec. "
	"You have access to two knowledge sources: current source code (CODE) and git commit history (HISTORY). "
	"Use whichever is relevant to answer the question accurately."
	"\n\nSTRICT FORMATTING RULES:"
	"\n- Start immediately with the answer."
	"\n- Use H3 headers (###) for distinct sections."
	"\n- For history answers, always cite the commit SHA and author."
	"\n- For code answers, reference the file and function name."
	"\n- Do NOT include absolute local file paths."
	"\n- Keep the tone professional and concise."
	"\n- Be concise. No 'In conclusion' or summary sections — end when the answer is complete."

	)

	llm_messages = [
	{"role": "system", "content": systemPrompts},
	*st.session_state.messages[-5:],
	{"role": "user", "content": f"Context:\n{formattedContext}\n\nQuestion: {query}"}
	]
	try:
	llmResponse = groqClient.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=llm_messages,
	temperature=0.1
	)
	fullResponse = llmResponse.choices[0].message.content
	except Exception as e:
	if "rate_limit_exceeded" in str(e).lower() or "413" in str(e):
	fullResponse = (
	"Rate Limit Reached: The context for this repository is quite large for the free tier. "
	"I've tried to answer, but Groq is busy. Please try: \n"
	"1. Reducing the Context Depth slider in the sidebar.\n"
	"2. Asking a more specific question.\n"
	"3. Waiting 60 seconds and trying again."
	)
	else:
	fullResponse = f"An unexpected error occurred: {str(e)}"


	# Display answer
	with st.chat_message("assistant"):
	st.markdown(fullResponse)
	st.caption(f"Route: `{route}`")

	if searchResults:
	with st.expander("Explore Reference Sources"):
	for sourceType, res in searchResults:
	p = res.payload
	if sourceType == "code":
	st.caption(f"[CODE] {p.get('filePath')}")
	st.code(p.get('content'), language='python')
	else:
	st.caption(f"[HISTORY] {p.get('sha')} — {p.get('summary')}")
	for change in p.get('changes', []):
	st.caption(f"File: {change['file']} ({change['status']})")

	st.session_state.messages.append({"role": "assistant", "content": fullResponse})