Spaces:

slenk
/

codewraith

Sleeping

App Files Files Community

slenk commited on Apr 12

Commit

cf6c23e

verified ·

1 Parent(s): f93e07a

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +29 -72

app.py CHANGED Viewed

@@ -6,19 +6,19 @@ Set HF_REPO_ID environment variable to point to your uploaded adapter.
 from __future__ import annotations
-import json
 import os
 from pathlib import Path
 from typing import Any
 import gradio as gr
 import spaces
 # --- Config ---
-HF_REPO_ID = os.environ.get("HF_REPO_ID", "slenk/codewraith-lora-3b")
-MODEL_KEY = os.environ.get("MODEL_KEY", "3b")
 ADAPTER_DIR = "./adapter"
 MODELS = {
@@ -26,18 +26,6 @@ MODELS = {
     "8b": "unsloth/Llama-3.1-8B-Instruct",
 }
-# Duplicated here since spaces/app.py runs standalone on HF (can't import codewraith)
-SYSTEM_MESSAGE = (
-    "You are CodeWraith, a technical specification generator. "
-    "Given Python source code, produce a structured Markdown specification "
-    "that accurately captures all functions, classes, parameters, return types, "
-    "dependencies, and error handling patterns. "
-    "Include a mermaid diagram showing the relationships between classes and functions. "
-    "Use valid mermaid syntax with proper node IDs (no spaces or special characters in IDs). "
-    "Example: ```mermaid\ngraph TD\n    A[ModuleName] --> B[ClassName]\n"
-    "    B --> C[method_name]\n```"
-)
 EXAMPLE_CODE = '''\
 def fibonacci(n: int) -> list[int]:
     """Generate the first n Fibonacci numbers."""
@@ -106,69 +94,39 @@ def load_model() -> tuple[Any, Any]:
 def init_retriever():
-    """Initialize retriever if training data is bundled."""
     global _retriever  # noqa: PLW0603
     if _retriever is not None:
         return _retriever
-    index_path = Path("chromadb")
-    data_path = Path("training_pairs_clean.jsonl")
-    if not index_path.exists() and data_path.exists():
-        # Build index from bundled data
-        try:
-            import chromadb
-            from chromadb.utils import embedding_functions
-            client = chromadb.PersistentClient(path=str(index_path))
-            ef = embedding_functions.SentenceTransformerEmbeddingFunction(
-                model_name="all-MiniLM-L6-v2"
-            )
-            collection = client.get_or_create_collection(
-                name="codewraith_specs", embedding_function=ef
-            )
-            if collection.count() == 0:
-                pairs = []
-                with data_path.open() as f:
-                    for line in f:
-                        if line.strip():
-                            pairs.append(json.loads(line))
-                for i in range(0, len(pairs), 50):
-                    batch = pairs[i : i + 50]
-                    collection.add(
-                        ids=[f"pair_{i + j}" for j in range(len(batch))],
-                        documents=[p["input"] for p in batch],
-                        metadatas=[{"spec": p["output"]} for p in batch],
-                    )
-            _retriever = (client, collection, ef)
-        except Exception as e:
-            print(f"RAG init failed: {e}")
-    return _retriever
 def retrieve_context(source_code: str, n_results: int = 3) -> str:
     """Retrieve similar examples as context."""
-    ret = init_retriever()
-    if ret is None:
         return ""
-    _, collection, _ = ret
-    results = collection.query(query_texts=[source_code], n_results=n_results)
-    parts = ["Here are examples of Python code and their specifications:\n"]
-    for i, (doc, meta) in enumerate(zip(results["documents"][0], results["metadatas"][0]), 1):
-        parts.append(
-            f"\n--- Example {i} ---\n"
-            f"Code:\n```python\n{doc[:1500]}\n```\n"
-            f"Specification:\n{meta['spec'][:1500]}\n"
-        )
-    parts.append("\nNow generate a specification for the following code:\n")
-    return "".join(parts)
 # --- Inference ---
@@ -206,9 +164,8 @@ def generate_spec(
         inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
         input_len = inputs["input_ids"].shape[-1]
-        # Check if input is too long -- truncate RAG context if needed
         if input_len > 6000 and use_rag:
-            # Retry without RAG context
             messages = [
                 {"role": "system", "content": SYSTEM_MESSAGE},
                 {"role": "user", "content": source_code},
@@ -247,7 +204,7 @@ def create_app():
         gr.Markdown(
             "# CodeWraith\n"
             "Generate technical specifications from Python source code.\n\n"
-            "Paste your Python code on the left, adjust sampling parameters, "
             "and click **Generate Specification**."
         )
@@ -267,10 +224,10 @@ def create_app():
             clear_input_btn = gr.Button("Clear Input", variant="secondary")
             clear_output_btn = gr.Button("Clear Output", variant="secondary")
-        spec_output = gr.Markdown(label="Generated Specification")
         gr.Markdown("*Model loads on first generation (~30s). Subsequent calls are fast.*")
         loading_msg = "*Generating specification... (loading model if first run)*"
         generate_btn.click(
             fn=lambda: gr.update(value=loading_msg),
@@ -294,8 +251,8 @@ def create_app():
     return app
-# Preload model on startup (before GPU decorator kicks in)
-print("Preloading model and adapter...")
 download_adapter()
 print("Adapter ready. Model will load on first GPU request.")

 from __future__ import annotations
 import os
 from pathlib import Path
 from typing import Any
 import gradio as gr
 import spaces
+from codewraith import SYSTEM_MESSAGE
 # --- Config ---
+HF_REPO_ID = os.environ.get("HF_REPO_ID", "slenk/codewraith-lora-8b")
+MODEL_KEY = os.environ.get("MODEL_KEY", "8b")
 ADAPTER_DIR = "./adapter"
 MODELS = {
     "8b": "unsloth/Llama-3.1-8B-Instruct",
 }
 EXAMPLE_CODE = '''\
 def fibonacci(n: int) -> list[int]:
     """Generate the first n Fibonacci numbers."""
 def init_retriever():
+    """Initialize retriever if ChromaDB index exists."""
     global _retriever  # noqa: PLW0603
     if _retriever is not None:
         return _retriever
+    try:
+        from codewraith.app.retriever import SpecRetriever
+        retriever = SpecRetriever()
+        if Path("data/chromadb").exists():
+            collection = retriever._get_collection()
+            if collection.count() > 0:
+                _retriever = retriever
+                print(f"RAG retriever loaded ({collection.count()} examples)")
+                return _retriever
+    except ImportError:
+        pass
+    return None
 def retrieve_context(source_code: str, n_results: int = 3) -> str:
     """Retrieve similar examples as context."""
+    retriever = init_retriever()
+    if retriever is None:
         return ""
+    examples = retriever.retrieve(source_code, n_results=n_results)
+    if not examples:
+        return ""
+    return retriever.format_context(examples)
 # --- Inference ---
         inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
         input_len = inputs["input_ids"].shape[-1]
+        # Retry without RAG if input too long
         if input_len > 6000 and use_rag:
             messages = [
                 {"role": "system", "content": SYSTEM_MESSAGE},
                 {"role": "user", "content": source_code},
         gr.Markdown(
             "# CodeWraith\n"
             "Generate technical specifications from Python source code.\n\n"
+            "Paste your Python code below, adjust sampling parameters, "
             "and click **Generate Specification**."
         )
             clear_input_btn = gr.Button("Clear Input", variant="secondary")
             clear_output_btn = gr.Button("Clear Output", variant="secondary")
         gr.Markdown("*Model loads on first generation (~30s). Subsequent calls are fast.*")
+        spec_output = gr.Markdown(label="Generated Specification")
         loading_msg = "*Generating specification... (loading model if first run)*"
         generate_btn.click(
             fn=lambda: gr.update(value=loading_msg),
     return app
+# Preload adapter on startup (CPU time, free)
+print("Preloading adapter...")
 download_adapter()
 print("Adapter ready. Model will load on first GPU request.")