Spaces:

arunn7
/

ai-code-analyzer

Sleeping

arun3676 commited on Oct 19, 2025

Commit

ff3ea6b

1 Parent(s): 5c6379b

Add Hugging Face integration and remote model support

- Created Hugging Face Space files (app.py, requirements.txt, README.md)
- Enhanced optimized_code_analyzer_enhanced.py with remote API support
- Updated matrix_final.py with model selection UI
- Added remote model configuration and connection testing
- Support for both local CodeT5+ and remote fine-tuned DeepSeek models

Files changed (5) hide show

hf-space/README.md +61 -0
hf-space/app.py +91 -0
hf-space/requirements.txt +7 -0
matrix_final.py +76 -24
optimized_code_analyzer_enhanced.py +51 -0

hf-space/README.md ADDED Viewed

	@@ -0,0 +1,61 @@

+---
+title: Fine-tuned Code Analyzer API
+emoji: 🤖
+colorFrom: green
+colorTo: blue
+sdk: docker
+pinned: false
+---
+# Fine-tuned Code Analyzer API
+API endpoint for code analysis using fine-tuned DeepSeek model.
+## Features
+- **Quality Scores**: 1-100 rating for code quality
+- **Structured Analysis**: Bugs, Performance, Security sections
+- **Code Improvements**: Specific suggestions with examples
+- **Professional Output**: Consistent, detailed analysis format
+## Usage
+### POST /analyze
+Analyze code for bugs, performance, and security issues.
+**Request:**
+```json
+{
+  "code": "def fibonacci(n):\n    if n <= 1:\n        return n\n    return fibonacci(n-1) + fibonacci(n-2)",
+  "max_tokens": 300
+}
+```
+**Response:**
+```json
+{
+  "analysis": "Quality Score: 35/100\n\nBUGS:\n- No error handling\n- Infinite recursion possible\n\nPERFORMANCE ISSUES:\n- Recursive calls cause exponential time complexity\n\nSECURITY CONCERNS:\n- No input validation\n\nIMPROVEMENTS:\n1. Use memoization to avoid recursion\n2. Add input validation\n\nExample improved code:\n[Shows working fixes]",
+  "model": "fine-tuned-deepseek",
+  "status": "success"
+}
+```
+### GET /health
+Health check endpoint.
+**Response:**
+```json
+{
+  "status": "healthy",
+  "model": "fine-tuned-deepseek"
+}
+```
+## Model Details
+- **Base Model**: DeepSeek Coder 1.3B
+- **Training Method**: LoRA (Low-Rank Adaptation)
+- **Dataset**: 59+ high-quality code analysis examples
+- **Fine-tuned for**: Code analysis, bug detection, performance optimization

hf-space/app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
+import torch
+import uvicorn
+app = FastAPI()
+# Load model once at startup
+tokenizer = None
+model = None
+@app.on_event("startup")
+async def load_model():
+    global tokenizer, model
+    print("🚀 Loading fine-tuned model...")
+    tokenizer = AutoTokenizer.from_pretrained(
+        "deepseek-ai/deepseek-coder-1.3b-instruct",
+        trust_remote_code=True
+    )
+    tokenizer.pad_token = tokenizer.eos_token
+    base_model = AutoModelForCausalLM.from_pretrained(
+        "deepseek-ai/deepseek-coder-1.3b-instruct",
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    model = PeftModel.from_pretrained(
+        base_model,
+        "arun3676/fine-tuned-code-analyzer"
+    )
+    print("✅ Model loaded successfully!")
+class CodeRequest(BaseModel):
+    code: str
+    max_tokens: int = 300
+class AnalysisResponse(BaseModel):
+    analysis: str
+    model: str
+    status: str
+@app.post("/analyze", response_model=AnalysisResponse)
+async def analyze_code(request: CodeRequest):
+    try:
+        prompt = f"<s>[INST] Analyze this code for bugs, performance, and security issues. Give a quality score from 1-100 and provide a detailed analysis.\n\nCode:\n```\n{request.code}\n``` [/INST]"
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=request.max_tokens,
+                temperature=0.7,
+                do_sample=True,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+            )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        analysis = response.split('[/INST]')[-1].strip()
+        return AnalysisResponse(
+            analysis=analysis,
+            model="fine-tuned-deepseek",
+            status="success"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "model": "fine-tuned-deepseek"}
+@app.get("/")
+async def root():
+    return {
+        "message": "Fine-tuned Code Analyzer API",
+        "endpoints": {
+            "POST /analyze": "Analyze code for bugs, performance, and security issues",
+            "GET /health": "Health check endpoint"
+        },
+        "model": "fine-tuned-deepseek"
+    }
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)

hf-space/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+transformers
+peft
+torch
+accelerate
+requests

matrix_final.py CHANGED Viewed

@@ -5,7 +5,7 @@ import random
 import sys
 from dotenv import load_dotenv
 from analyzer import CodeAnalyzer
-from optimized_code_analyzer import OptimizedCodeAnalyzer
 # Load environment variables
 load_dotenv()
@@ -281,15 +281,21 @@ def get_analyzer():
 analyzer = get_analyzer()
-# Local CodeT5+ analyzer (cached)
 @st.cache_resource
-def get_local_analyzer():
-    return OptimizedCodeAnalyzer(
-        model_id="Salesforce/codet5p-220m",
-        precision="fp16",  # fastest from benchmark
-        quick_max_new_tokens=180,
-        detailed_max_new_tokens=240,
-    )
 def display_matrix_analysis_result(result: dict, model_name: str):
     """Display analysis result in clean, readable horizontal blocks."""
@@ -613,16 +619,52 @@ with st.sidebar:
         format_func=lambda x: f"📝 {x}" if x == "Code Analysis" else f"📦 {x}"
     )
-    # Local model toggle and preset
-    use_local = st.checkbox("💻 Use Local CodeT5+ (no external API)", value=False)
-    local_preset = st.selectbox(
-        "Local Inference Mode",
-        ["Quick", "Detailed"],
-        index=0,
-        help="Quick = beams 1, ~180 tokens. Detailed = beams 2, ~240 tokens.",
-        disabled=not use_local,
     )
     if analysis_mode == "GitHub Repository":
         st.markdown("#### Repository Analysis")
         github_url = st.text_input(
@@ -855,10 +897,20 @@ with col2:
                     else:
                         # Single model analysis
-                        if use_local:
-                            st.markdown("#### 🤖 CODET5+_LOCAL_ANALYSIS")
-                            local = get_local_analyzer()
-                            if local_preset == "Quick":
                                 result = local.analyze_code_fast(code_input, mode="quick")
                                 # adapt to display format
                                 display_matrix_analysis_result({
@@ -871,10 +923,10 @@ with col2:
                                     "language": "auto",
                                     "line_count": len(code_input.splitlines()),
                                     "raw_response": result["analysis"],
-                                }, "CodeT5+ Local (Quick)")
                             else:
                                 # streaming path – consume generator and show final
-                                local = get_local_analyzer()
                                 final_text = None
                                 for chunk in local.analyze_code_streaming(code_input, show_progress=True, mode="detailed"):
                                     final_text = chunk
@@ -888,7 +940,7 @@ with col2:
                                     "language": "auto",
                                     "line_count": len(code_input.splitlines()),
                                     "raw_response": final_text or "",
-                                }, "CodeT5+ Local (Detailed)")
                         else:
                             st.markdown(f"#### 🤖 {available_models[selected_model].upper()}_ANALYSIS")
                             result = analyzer.analyze_code(

 import sys
 from dotenv import load_dotenv
 from analyzer import CodeAnalyzer
+from optimized_code_analyzer_enhanced import EnhancedCodeAnalyzer
 # Load environment variables
 load_dotenv()
 analyzer = get_analyzer()
+# Local analyzer (cached)
 @st.cache_resource
+def get_local_analyzer(model_type="codet5", remote_url=None):
+    if remote_url:
+        return EnhancedCodeAnalyzer(
+            model_type="deepseek-finetuned-remote",
+            remote_api_url=remote_url
+        )
+    else:
+        return EnhancedCodeAnalyzer(
+            model_type=model_type,
+            precision="fp16",
+            quick_max_new_tokens=180,
+            detailed_max_new_tokens=300,
+        )
 def display_matrix_analysis_result(result: dict, model_name: str):
     """Display analysis result in clean, readable horizontal blocks."""
         format_func=lambda x: f"📝 {x}" if x == "Code Analysis" else f"📦 {x}"
     )
+    # Model Selection
+    st.markdown("#### 🤖 AI Model Selection")
+    model_choice = st.radio(
+        "Choose Analysis Model:",
+        [
+            "CodeT5+ (Fast - Local)",
+            "Fine-tuned DeepSeek (Accurate - Remote)"
+        ],
+        help="Local models run on your computer, Remote model runs on Hugging Face (always available)"
     )
+    # Remote model configuration
+    remote_api_url = None
+    if "Remote" in model_choice:
+        st.markdown("#### 🌐 Remote Model Configuration")
+        remote_api_url = st.text_input(
+            "Hugging Face Space URL:",
+            value="https://arun3676-fine-tuned-code-analyzer.hf.space",
+            help="Your Hugging Face Space URL"
+        )
+        # Test connection
+        if st.button("🔗 Test Connection"):
+            try:
+                import requests
+                response = requests.get(f"{remote_api_url}/health", timeout=5)
+                if response.status_code == 200:
+                    st.success("✅ Connected to remote model!")
+                else:
+                    st.error("❌ Connection failed")
+            except:
+                st.error("❌ Cannot reach remote model")
+    # Local model toggle and preset (for CodeT5+)
+    if "CodeT5+" in model_choice:
+        use_local = True
+        local_preset = st.selectbox(
+            "Local Inference Mode",
+            ["Quick", "Detailed"],
+            index=0,
+            help="Quick = beams 1, ~180 tokens. Detailed = beams 2, ~240 tokens.",
+        )
+    else:
+        use_local = False
+        local_preset = "Detailed"
     if analysis_mode == "GitHub Repository":
         st.markdown("#### Repository Analysis")
         github_url = st.text_input(
                     else:
                         # Single model analysis
+                        if use_local or "Remote" in model_choice:
+                            # Determine model type and display name
+                            if "Remote" in model_choice:
+                                st.markdown("#### 🤖 FINE-TUNED_DEEPSEEK_REMOTE_ANALYSIS")
+                                model_type = "deepseek-finetuned-remote"
+                                display_name = "Fine-tuned DeepSeek (Remote)"
+                            else:
+                                st.markdown("#### 🤖 CODET5+_LOCAL_ANALYSIS")
+                                model_type = "codet5"
+                                display_name = "CodeT5+ Local"
+                            local = get_local_analyzer(model_type, remote_api_url)
+                            if local_preset == "Quick" or "Remote" in model_choice:
                                 result = local.analyze_code_fast(code_input, mode="quick")
                                 # adapt to display format
                                 display_matrix_analysis_result({
                                     "language": "auto",
                                     "line_count": len(code_input.splitlines()),
                                     "raw_response": result["analysis"],
+                                }, f"{display_name} (Quick)")
                             else:
                                 # streaming path – consume generator and show final
+                                local = get_local_analyzer(model_type, remote_api_url)
                                 final_text = None
                                 for chunk in local.analyze_code_streaming(code_input, show_progress=True, mode="detailed"):
                                     final_text = chunk
                                     "language": "auto",
                                     "line_count": len(code_input.splitlines()),
                                     "raw_response": final_text or "",
+                                }, f"{display_name} (Detailed)")
                         else:
                             st.markdown(f"#### 🤖 {available_models[selected_model].upper()}_ANALYSIS")
                             result = analyzer.analyze_code(

optimized_code_analyzer_enhanced.py CHANGED Viewed

@@ -39,6 +39,7 @@ class EnhancedCodeAnalyzer:
         precision: str = "fp16",
         quick_max_new_tokens: int = 180,
         detailed_max_new_tokens: int = 300,
     ):
         """
         Initialize the enhanced analyzer.
@@ -77,6 +78,7 @@ class EnhancedCodeAnalyzer:
         self.model = None
         self.tokenizer = None
         self.cache = {}
         # Create cache directory
         os.makedirs(cache_dir, exist_ok=True)
@@ -345,6 +347,10 @@ Code:
         Returns:
             Dict: Analysis result
         """
         # Check cache first
         cached_result = self._check_cache(code)
         if cached_result:
@@ -459,6 +465,51 @@ Code:
         return min(score, 100)
     def get_model_info(self) -> Dict[str, Any]:
         """Get information about the loaded model."""
         if self.model is None:

         precision: str = "fp16",
         quick_max_new_tokens: int = 180,
         detailed_max_new_tokens: int = 300,
+        remote_api_url: Optional[str] = None,
     ):
         """
         Initialize the enhanced analyzer.
         self.model = None
         self.tokenizer = None
         self.cache = {}
+        self.remote_api_url = remote_api_url
         # Create cache directory
         os.makedirs(cache_dir, exist_ok=True)
         Returns:
             Dict: Analysis result
         """
+        # Check if using remote model
+        if self.remote_api_url:
+            return self.analyze_code_remote(code, mode)
         # Check cache first
         cached_result = self._check_cache(code)
         if cached_result:
         return min(score, 100)
+    def analyze_code_remote(self, code: str, mode: str = "quick") -> Dict[str, Any]:
+        """Analyze code using remote Hugging Face API."""
+        import requests
+        if not self.remote_api_url:
+            raise ValueError("No remote API URL configured")
+        cached_result = self._check_cache(code)
+        if cached_result:
+            cached_result["cached"] = True
+            return cached_result
+        start_time = time.time()
+        try:
+            max_tokens = self.quick_max_new_tokens if mode == "quick" else self.detailed_max_new_tokens
+            response = requests.post(
+                f"{self.remote_api_url}/analyze",
+                json={"code": code, "max_tokens": max_tokens},
+                timeout=60
+            )
+            response.raise_for_status()
+            data = response.json()
+            analysis_text = data["analysis"]
+            quality_score = self._calculate_quality_score(analysis_text)
+            total_time = time.time() - start_time
+            result = {
+                "analysis": analysis_text,
+                "quality_score": quality_score,
+                "execution_time": total_time,
+                "model": "fine-tuned-deepseek-remote",
+                "model_type": "deepseek-finetuned-remote",
+                "cached": False
+            }
+            self._save_to_cache(code, result)
+            return result
+        except Exception as e:
+            raise Exception(f"Remote analysis failed: {e}")
     def get_model_info(self) -> Dict[str, Any]:
         """Get information about the loaded model."""
         if self.model is None: