Spaces:

smgc
/

gcli2api

Paused

App Files Files Community

smgc commited on Sep 7, 2025

Commit

ca586bf

verified ·

1 Parent(s): f946611

Create server.go

Browse files

Files changed (1) hide show

server.go +367 -0

server.go ADDED Viewed

	@@ -0,0 +1,367 @@

+package server
+import (
+	"context"
+	"crypto/subtle"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"regexp"
+	"strings"
+	"time"
+	"gcli2api/internal/codeassist"
+	"gcli2api/internal/config"
+	"gcli2api/internal/gemini"
+	// "gcli2api/internal/utils"
+	"github.com/sirupsen/logrus"
+	"github.com/tiktoken-go/tokenizer"
+)
+var (
+	modelPathUnary  = regexp.MustCompile(`^/v1beta/models/([^/]+):generateContent$`)
+	modelPathStream = regexp.MustCompile(`^/v1beta/models/([^/]+):streamGenerateContent$`)
+)
+// CodeAssist abstracts the client for easier testing.
+type CodeAssist interface {
+	GenerateContent(ctx context.Context, model, project string, req gemini.GeminiRequest) (*gemini.GeminiAPIResponse, error)
+	GenerateContentStream(ctx context.Context, model, project string, req gemini.GeminiRequest) (<-chan gemini.GeminiAPIResponse, <-chan error)
+}
+type Server struct {
+	cfg      config.Config
+	httpCli  *http.Client
+	caClient CodeAssist
+	// sem is a simple semaphore for concurrency limiting
+	sem chan struct{}
+}
+func New(cfg config.Config, httpCli *http.Client) *Server {
+	// Apply safe defaults when fields are zero to match config.LoadConfig behavior
+	if cfg.RequestMaxRetries == 0 {
+		cfg.RequestMaxRetries = 3
+	}
+	if cfg.RequestBaseDelayMillis == 0 {
+		cfg.RequestBaseDelayMillis = 1000
+	}
+	if cfg.RequestMaxBodyBytes == 0 {
+		cfg.RequestMaxBodyBytes = 16 * 1024 * 1024
+	}
+	if cfg.MaxConcurrentRequests == 0 {
+		cfg.MaxConcurrentRequests = 64
+	}
+	return &Server{
+		cfg:      cfg,
+		httpCli:  httpCli,
+		caClient: codeassist.NewCaClient(httpCli, cfg.RequestMaxRetries, time.Duration(cfg.RequestBaseDelayMillis)*time.Millisecond),
+		sem:      make(chan struct{}, cfg.MaxConcurrentRequests),
+	}
+}
+// NewWithCAClient allows injecting a custom CodeAssist client (for tests).
+func NewWithCAClient(cfg config.Config, ca CodeAssist) *Server {
+	// Apply same defaults as New to ensure handlers work in tests with zero config
+	if cfg.RequestMaxRetries == 0 {
+		cfg.RequestMaxRetries = 3
+	}
+	if cfg.RequestBaseDelayMillis == 0 {
+		cfg.RequestBaseDelayMillis = 1000
+	}
+	if cfg.RequestMaxBodyBytes == 0 {
+		cfg.RequestMaxBodyBytes = 16 * 1024 * 1024
+	}
+	if cfg.MaxConcurrentRequests == 0 {
+		cfg.MaxConcurrentRequests = 64
+	}
+	return &Server{cfg: cfg, caClient: ca, sem: make(chan struct{}, cfg.MaxConcurrentRequests)}
+}
+func (s *Server) Router() http.Handler {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/health", s.handleHealth)
+	mux.HandleFunc("/v1beta/models", s.handleListModels)
+	mux.HandleFunc("/v1beta/models/", s.handleModel)
+	// Order: recover (outermost) -> logging -> concurrency limiter -> handlers
+	return s.withRecover(s.withLogging(s.withConcurrencyLimit(mux)))
+}
+func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_ = json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
+}
+func (s *Server) authorize(r *http.Request) bool {
+	key := s.cfg.AuthKey
+	if key == "" {
+		return true
+	}
+	// 检查 Authorization Header
+	if ah := r.Header.Get("Authorization"); ah != "" {
+		const p = "Bearer "
+		if strings.HasPrefix(ah, p) {
+			// Constant-time comparison to mitigate timing attacks
+			if 1 == subtle.ConstantTimeCompare([]byte(strings.TrimSpace(ah[len(p):])), []byte(key)) {
+				return true
+			}
+		}
+	}
+	// 检查 x-goog-api-key Header
+	if h := r.Header.Get("x-goog-api-key"); h != "" {
+		if 1 == subtle.ConstantTimeCompare([]byte(h), []byte(key)) {
+			return true
+		}
+	}
+	//  ====== 新增改动开始 ======
+	// 检查 URL 查询参数中的 key
+	if qk := r.URL.Query().Get("key"); qk != "" {
+		if 1 == subtle.ConstantTimeCompare([]byte(qk), []byte(key)) {
+			return true
+		}
+	}
+	//  ====== 新增改动结束 ======
+	return false
+}
+func (s *Server) handleListModels(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	if !s.authorize(r) {
+		http.Error(w, "unauthorized", http.StatusUnauthorized)
+		return
+	}
+	w.Header().Set("Content-Type", "application/json")
+	_ = json.NewEncoder(w).Encode(listModels())
+}
+func (s *Server) handleModel(w http.ResponseWriter, r *http.Request) {
+	if !s.authorize(r) {
+		http.Error(w, "unauthorized", http.StatusUnauthorized)
+		return
+	}
+	if r.Method != http.MethodPost {
+		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+	path := r.URL.Path
+	if m := modelPathUnary.FindStringSubmatch(path); m != nil {
+		model := m[1]
+		s.handleGenerateContent(model, w, r)
+		return
+	}
+	if m := modelPathStream.FindStringSubmatch(path); m != nil {
+		model := m[1]
+		s.handleStreamGenerateContent(model, w, r)
+		return
+	}
+	http.NotFound(w, r)
+}
+func (s *Server) validateModel(model string) bool {
+	return gemini.IsSupportedModel(model)
+}
+func (s *Server) decodeGeminiRequest(r *http.Request) (gemini.GeminiRequest, error) {
+	var req gemini.GeminiRequest
+	dec := json.NewDecoder(r.Body)
+	if err := dec.Decode(&req); err != nil {
+		return req, err
+	}
+	req = gemini.NormalizeGeminiRequest(req)
+	return req, nil
+}
+func (s *Server) handleGenerateContent(model string, w http.ResponseWriter, r *http.Request) {
+	if !s.validateModel(model) {
+		http.Error(w, "unknown model", http.StatusBadRequest)
+		return
+	}
+	// Limit request body size
+	r.Body = http.MaxBytesReader(w, r.Body, s.cfg.RequestMaxBodyBytes)
+	req, err := s.decodeGeminiRequest(r)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("bad request: %v", err), http.StatusBadRequest)
+		return
+	}
+	// Enriched logging: model, thinking config, and total tokens
+	var thinking any
+	if req.GenerationConfig != nil {
+		thinking = req.GenerationConfig.ThinkingConfig
+	}
+	totalTokens := countRequestTokens(req)
+	logrus.WithFields(logrus.Fields{
+		"model":          model,
+		"thinkingConfig": thinking,
+		"totalTokens":    totalTokens,
+	}).Info("sending to upstream")
+	ctx, cancel := context.WithTimeout(r.Context(), 5*time.Minute)
+	defer cancel()
+	resp, err := s.caClient.GenerateContent(ctx, model, "", req)
+	if err != nil {
+		http.Error(w, err.Error(), httpStatusFromError(err))
+		return
+	}
+	w.Header().Set("Content-Type", "application/json")
+	_ = json.NewEncoder(w).Encode(resp)
+}
+func (s *Server) handleStreamGenerateContent(model string, w http.ResponseWriter, r *http.Request) {
+	if !s.validateModel(model) {
+		http.Error(w, "unknown model", http.StatusBadRequest)
+		return
+	}
+	// Limit request body size
+	r.Body = http.MaxBytesReader(w, r.Body, s.cfg.RequestMaxBodyBytes)
+	req, err := s.decodeGeminiRequest(r)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("bad request: %v", err), http.StatusBadRequest)
+		return
+	}
+	// logrus.Infof("decoded request %s", utils.TruncateLongStringInObject(req, 100))
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		logrus.Warn("streaming unsupported")
+		http.Error(w, "streaming unsupported", http.StatusInternalServerError)
+		return
+	}
+	// SSE headers
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no")
+	ctx, cancel := context.WithCancel(r.Context())
+	defer cancel()
+	out, errs := s.caClient.GenerateContentStream(ctx, model, "", req)
+	// Prepare enriched logging: model, thinking config, and total tokens
+	var thinking any
+	if req.GenerationConfig != nil {
+		thinking = req.GenerationConfig.ThinkingConfig
+	}
+	totalTokens := countRequestTokens(req)
+	logrus.WithFields(logrus.Fields{
+		"model":          model,
+		"thinkingConfig": thinking,
+		"totalTokens":    totalTokens,
+	}).Info("sending to upstream")
+	enc := json.NewEncoder(w)
+	for {
+		select {
+		case g, ok := <-out:
+			if !ok {
+				return
+			}
+			// SSE event - send raw response like TypeScript version
+			if _, err := fmt.Fprint(w, "data: "); err != nil {
+				logrus.Errorf("error writing data prefix: %v", err)
+				return
+			}
+			if err := enc.Encode(g); err != nil {
+				return
+			}
+			// enc.Encode writes a trailing newline
+			if _, err := fmt.Fprint(w, "\n"); err != nil {
+				logrus.Errorf("error writing newline: %v", err)
+				return
+			}
+			flusher.Flush()
+		case e, ok := <-errs:
+			// If the error channel is closed or yields a nil error,
+			// treat it as a normal end-of-stream signal but continue
+			// draining the output channel until it closes.
+			if !ok || e == nil {
+				// Disable further selects on errs to avoid busy looping on a closed channel
+				errs = nil
+				continue
+			}
+			// Non-nil error: emit error event then end
+			if _, err := fmt.Fprint(w, "event: error\n"); err != nil {
+				logrus.Errorf("error writing error event: %v", err)
+				return
+			}
+			if _, err := fmt.Fprintf(w, "data: {\"error\":{\"message\":%q}}\n\n", e.Error()); err != nil {
+				logrus.Errorf("error writing error data: %v", err)
+				return
+			}
+			flusher.Flush()
+			return
+		case <-ctx.Done():
+			return
+		}
+	}
+}
+// countRequestTokens approximates the total token count for the request
+// by summing tokens of all text parts in systemInstruction and contents
+// using tiktoken-go/tokenizer. We default to O200kBase encoding.
+func countRequestTokens(req gemini.GeminiRequest) int {
+	enc, err := tokenizer.Get(tokenizer.O200kBase)
+	if err != nil {
+		return 0
+	}
+	total := 0
+	// system instruction ignored for token counting (feature removed)
+	// contents
+	for _, c := range req.Contents {
+		for _, p := range c.Parts {
+			if p.Text != "" {
+				if n, err := enc.Count(p.Text); err == nil {
+					total += n
+				}
+			}
+		}
+	}
+	return total
+}
+func httpStatusFromError(err error) int {
+	// Simple mapping; upstream errors already include status text sometimes.
+	s := err.Error()
+	if strings.Contains(s, "status 401") {
+		return http.StatusUnauthorized
+	}
+	if strings.Contains(s, "status 403") {
+		return http.StatusForbidden
+	}
+	if strings.Contains(s, "status 429") {
+		return http.StatusTooManyRequests
+	}
+	if strings.Contains(s, "status 5") {
+		return http.StatusBadGateway
+	}
+	return http.StatusBadRequest
+}
+func listModels() interface{} {
+	type model struct {
+		Name                       string   `json:"name"`
+		Version                    string   `json:"version"`
+		DisplayName                string   `json:"displayName"`
+		Description                string   `json:"description"`
+		SupportedGenerationMethods []string `json:"supportedGenerationMethods"`
+	}
+	out := struct {
+		Models []model `json:"models"`
+	}{Models: make([]model, 0, len(gemini.SupportedModels))}
+	for _, m := range gemini.SupportedModels {
+		out.Models = append(out.Models, model{
+			Name:                       "models/" + m.Name,
+			Version:                    "001",
+			DisplayName:                m.DisplayName,
+			Description:                m.Description,
+			SupportedGenerationMethods: []string{"generateContent", "streamGenerateContent"},
+		})
+	}
+	return out
+}