From 27dfe7298d0381d5f4095cee94a86cdd8667a9f1 Mon Sep 17 00:00:00 2001 From: Anibal Angulo Date: Tue, 3 Mar 2026 05:32:37 +0000 Subject: [PATCH] Add better logging --- cmd/gateway/main.go | 118 ++++++++++++++++++++++++---- config.example.yaml | 4 + config.test.yaml | 16 ++++ internal/config/config.go | 9 +++ internal/logger/logger.go | 59 ++++++++++++++ internal/providers/google/google.go | 14 ++-- internal/providers/providers.go | 4 +- internal/server/mocks_test.go | 8 +- internal/server/server.go | 70 ++++++++++++++--- 9 files changed, 263 insertions(+), 39 deletions(-) create mode 100644 config.test.yaml create mode 100644 internal/logger/logger.go diff --git a/cmd/gateway/main.go b/cmd/gateway/main.go index 0b0f6b1..80ba3d6 100644 --- a/cmd/gateway/main.go +++ b/cmd/gateway/main.go @@ -6,11 +6,13 @@ import ( "flag" "fmt" "log" + "log/slog" "net/http" "os" "time" _ "github.com/go-sql-driver/mysql" + "github.com/google/uuid" _ "github.com/jackc/pgx/v5/stdlib" _ "github.com/mattn/go-sqlite3" "github.com/redis/go-redis/v9" @@ -18,6 +20,7 @@ import ( "github.com/ajac-zero/latticelm/internal/auth" "github.com/ajac-zero/latticelm/internal/config" "github.com/ajac-zero/latticelm/internal/conversation" + slogger "github.com/ajac-zero/latticelm/internal/logger" "github.com/ajac-zero/latticelm/internal/providers" "github.com/ajac-zero/latticelm/internal/server" ) @@ -32,13 +35,23 @@ func main() { log.Fatalf("load config: %v", err) } + // Initialize logger from config + logFormat := cfg.Logging.Format + if logFormat == "" { + logFormat = "json" + } + logLevel := cfg.Logging.Level + if logLevel == "" { + logLevel = "info" + } + logger := slogger.New(logFormat, logLevel) + registry, err := providers.NewRegistry(cfg.Providers, cfg.Models) if err != nil { - log.Fatalf("init providers: %v", err) + logger.Error("failed to initialize providers", slog.String("error", err.Error())) + os.Exit(1) } - logger := log.New(os.Stdout, "gateway ", log.LstdFlags|log.Lshortfile) - // Initialize authentication middleware authConfig := auth.Config{ Enabled: cfg.Auth.Enabled, @@ -47,19 +60,21 @@ func main() { } authMiddleware, err := auth.New(authConfig) if err != nil { - log.Fatalf("init auth: %v", err) + logger.Error("failed to initialize auth", slog.String("error", err.Error())) + os.Exit(1) } if cfg.Auth.Enabled { - logger.Printf("Authentication enabled (issuer: %s)", cfg.Auth.Issuer) + logger.Info("authentication enabled", slog.String("issuer", cfg.Auth.Issuer)) } else { - logger.Printf("Authentication disabled - WARNING: API is publicly accessible") + logger.Warn("authentication disabled - API is publicly accessible") } // Initialize conversation store convStore, err := initConversationStore(cfg.Conversations, logger) if err != nil { - log.Fatalf("init conversation store: %v", err) + logger.Error("failed to initialize conversation store", slog.String("error", err.Error())) + os.Exit(1) } gatewayServer := server.New(registry, convStore, logger) @@ -82,13 +97,14 @@ func main() { IdleTimeout: 120 * time.Second, } - logger.Printf("Open Responses gateway listening on %s", addr) + logger.Info("open responses gateway listening", slog.String("address", addr)) if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { - logger.Fatalf("server error: %v", err) + logger.Error("server error", slog.String("error", err.Error())) + os.Exit(1) } } -func initConversationStore(cfg config.ConversationConfig, logger *log.Logger) (conversation.Store, error) { +func initConversationStore(cfg config.ConversationConfig, logger *slog.Logger) (conversation.Store, error) { var ttl time.Duration if cfg.TTL != "" { parsed, err := time.ParseDuration(cfg.TTL) @@ -112,7 +128,11 @@ func initConversationStore(cfg config.ConversationConfig, logger *log.Logger) (c if err != nil { return nil, fmt.Errorf("init sql store: %w", err) } - logger.Printf("Conversation store initialized (sql/%s, TTL: %s)", driver, ttl) + logger.Info("conversation store initialized", + slog.String("backend", "sql"), + slog.String("driver", driver), + slog.Duration("ttl", ttl), + ) return store, nil case "redis": opts, err := redis.ParseURL(cfg.DSN) @@ -128,17 +148,83 @@ func initConversationStore(cfg config.ConversationConfig, logger *log.Logger) (c return nil, fmt.Errorf("connect to redis: %w", err) } - logger.Printf("Conversation store initialized (redis, TTL: %s)", ttl) + logger.Info("conversation store initialized", + slog.String("backend", "redis"), + slog.Duration("ttl", ttl), + ) return conversation.NewRedisStore(client, ttl), nil default: - logger.Printf("Conversation store initialized (memory, TTL: %s)", ttl) + logger.Info("conversation store initialized", + slog.String("backend", "memory"), + slog.Duration("ttl", ttl), + ) return conversation.NewMemoryStore(ttl), nil } } -func loggingMiddleware(next http.Handler, logger *log.Logger) http.Handler { +type responseWriter struct { + http.ResponseWriter + statusCode int + bytesWritten int +} + +func (rw *responseWriter) WriteHeader(code int) { + rw.statusCode = code + rw.ResponseWriter.WriteHeader(code) +} + +func (rw *responseWriter) Write(b []byte) (int, error) { + n, err := rw.ResponseWriter.Write(b) + rw.bytesWritten += n + return n, err +} + +func loggingMiddleware(next http.Handler, logger *slog.Logger) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { start := time.Now() - next.ServeHTTP(w, r) - logger.Printf("%s %s %s", r.Method, r.URL.Path, time.Since(start)) + + // Generate request ID + requestID := uuid.NewString() + ctx := slogger.WithRequestID(r.Context(), requestID) + r = r.WithContext(ctx) + + // Wrap response writer to capture status code + rw := &responseWriter{ + ResponseWriter: w, + statusCode: http.StatusOK, + } + + // Add request ID header + w.Header().Set("X-Request-ID", requestID) + + // Log request start + logger.InfoContext(ctx, "request started", + slog.String("request_id", requestID), + slog.String("method", r.Method), + slog.String("path", r.URL.Path), + slog.String("remote_addr", r.RemoteAddr), + slog.String("user_agent", r.UserAgent()), + ) + + next.ServeHTTP(rw, r) + + duration := time.Since(start) + + // Log request completion with appropriate level + logLevel := slog.LevelInfo + if rw.statusCode >= 500 { + logLevel = slog.LevelError + } else if rw.statusCode >= 400 { + logLevel = slog.LevelWarn + } + + logger.Log(ctx, logLevel, "request completed", + slog.String("request_id", requestID), + slog.String("method", r.Method), + slog.String("path", r.URL.Path), + slog.Int("status_code", rw.statusCode), + slog.Int("response_bytes", rw.bytesWritten), + slog.Duration("duration", duration), + slog.Float64("duration_ms", float64(duration.Milliseconds())), + ) }) } diff --git a/config.example.yaml b/config.example.yaml index 2d25fa5..a0245d5 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -1,6 +1,10 @@ server: address: ":8080" +logging: + format: "json" # "json" for production, "text" for development + level: "info" # "debug", "info", "warn", or "error" + providers: google: type: "google" diff --git a/config.test.yaml b/config.test.yaml new file mode 100644 index 0000000..8f5f323 --- /dev/null +++ b/config.test.yaml @@ -0,0 +1,16 @@ +server: + address: ":8080" + +logging: + format: "text" # text format for easy reading in development + level: "debug" # debug level to see all logs + +providers: + mock: + type: "openai" + api_key: "test-key" + endpoint: "https://api.openai.com" + +models: + - name: "gpt-4o-mini" + provider: "mock" diff --git a/internal/config/config.go b/internal/config/config.go index 803e058..0bebbf8 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -14,6 +14,7 @@ type Config struct { Models []ModelEntry `yaml:"models"` Auth AuthConfig `yaml:"auth"` Conversations ConversationConfig `yaml:"conversations"` + Logging LoggingConfig `yaml:"logging"` } // ConversationConfig controls conversation storage. @@ -30,6 +31,14 @@ type ConversationConfig struct { Driver string `yaml:"driver"` } +// LoggingConfig controls logging format and level. +type LoggingConfig struct { + // Format is the log output format: "json" (default) or "text". + Format string `yaml:"format"` + // Level is the minimum log level: "debug", "info" (default), "warn", or "error". + Level string `yaml:"level"` +} + // AuthConfig holds OIDC authentication settings. type AuthConfig struct { Enabled bool `yaml:"enabled"` diff --git a/internal/logger/logger.go b/internal/logger/logger.go new file mode 100644 index 0000000..40a3a6e --- /dev/null +++ b/internal/logger/logger.go @@ -0,0 +1,59 @@ +package logger + +import ( + "context" + "log/slog" + "os" +) + +type contextKey string + +const requestIDKey contextKey = "request_id" + +// New creates a logger with the specified format (json or text) and level. +func New(format string, level string) *slog.Logger { + var handler slog.Handler + + logLevel := parseLevel(level) + opts := &slog.HandlerOptions{ + Level: logLevel, + AddSource: true, // Add file:line info for debugging + } + + if format == "json" { + handler = slog.NewJSONHandler(os.Stdout, opts) + } else { + handler = slog.NewTextHandler(os.Stdout, opts) + } + + return slog.New(handler) +} + +// parseLevel converts a string level to slog.Level. +func parseLevel(level string) slog.Level { + switch level { + case "debug": + return slog.LevelDebug + case "info": + return slog.LevelInfo + case "warn": + return slog.LevelWarn + case "error": + return slog.LevelError + default: + return slog.LevelInfo + } +} + +// WithRequestID adds a request ID to the context for tracing. +func WithRequestID(ctx context.Context, requestID string) context.Context { + return context.WithValue(ctx, requestIDKey, requestID) +} + +// FromContext extracts the request ID from context, or returns empty string. +func FromContext(ctx context.Context) string { + if id, ok := ctx.Value(requestIDKey).(string); ok { + return id + } + return "" +} diff --git a/internal/providers/google/google.go b/internal/providers/google/google.go index 7b43b76..4e4e567 100644 --- a/internal/providers/google/google.go +++ b/internal/providers/google/google.go @@ -21,7 +21,7 @@ type Provider struct { } // New constructs a Provider using the Google AI API with API key authentication. -func New(cfg config.ProviderConfig) *Provider { +func New(cfg config.ProviderConfig) (*Provider, error) { var client *genai.Client if cfg.APIKey != "" { var err error @@ -29,20 +29,19 @@ func New(cfg config.ProviderConfig) *Provider { APIKey: cfg.APIKey, }) if err != nil { - // Log error but don't fail construction - will fail on Generate - fmt.Printf("warning: failed to create google client: %v\n", err) + return nil, fmt.Errorf("failed to create google client: %w", err) } } return &Provider{ cfg: cfg, client: client, - } + }, nil } // NewVertexAI constructs a Provider targeting Vertex AI. // Vertex AI uses the same genai SDK but with GCP project/location configuration // and Application Default Credentials (ADC) or service account authentication. -func NewVertexAI(vertexCfg config.VertexAIConfig) *Provider { +func NewVertexAI(vertexCfg config.VertexAIConfig) (*Provider, error) { var client *genai.Client if vertexCfg.Project != "" && vertexCfg.Location != "" { var err error @@ -52,8 +51,7 @@ func NewVertexAI(vertexCfg config.VertexAIConfig) *Provider { Backend: genai.BackendVertexAI, }) if err != nil { - // Log error but don't fail construction - will fail on Generate - fmt.Printf("warning: failed to create vertex ai client: %v\n", err) + return nil, fmt.Errorf("failed to create vertex ai client: %w", err) } } return &Provider{ @@ -62,7 +60,7 @@ func NewVertexAI(vertexCfg config.VertexAIConfig) *Provider { APIKey: "", }, client: client, - } + }, nil } func (p *Provider) Name() string { return Name } diff --git a/internal/providers/providers.go b/internal/providers/providers.go index a22f8a4..245fdfc 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -97,7 +97,7 @@ func buildProvider(entry config.ProviderEntry) (Provider, error) { return googleprovider.New(config.ProviderConfig{ APIKey: entry.APIKey, Endpoint: entry.Endpoint, - }), nil + }) case "vertexai": if entry.Project == "" || entry.Location == "" { return nil, fmt.Errorf("project and location are required for vertexai") @@ -105,7 +105,7 @@ func buildProvider(entry config.ProviderEntry) (Provider, error) { return googleprovider.NewVertexAI(config.VertexAIConfig{ Project: entry.Project, Location: entry.Location, - }), nil + }) default: return nil, fmt.Errorf("unknown provider type %q", entry.Type) } diff --git a/internal/server/mocks_test.go b/internal/server/mocks_test.go index ad1557a..122937c 100644 --- a/internal/server/mocks_test.go +++ b/internal/server/mocks_test.go @@ -3,7 +3,7 @@ package server import ( "context" "fmt" - "log" + "log/slog" "reflect" "sync" "unsafe" @@ -250,8 +250,10 @@ func (m *mockLogger) getLogs() []string { return append([]string{}, m.logs...) } -func (m *mockLogger) asLogger() *log.Logger { - return log.New(m, "", 0) +func (m *mockLogger) asLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(m, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) } func (m *mockLogger) Write(p []byte) (n int, err error) { diff --git a/internal/server/server.go b/internal/server/server.go index 621ba99..975b768 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -3,7 +3,7 @@ package server import ( "encoding/json" "fmt" - "log" + "log/slog" "net/http" "strings" "time" @@ -12,6 +12,7 @@ import ( "github.com/ajac-zero/latticelm/internal/api" "github.com/ajac-zero/latticelm/internal/conversation" + "github.com/ajac-zero/latticelm/internal/logger" "github.com/ajac-zero/latticelm/internal/providers" ) @@ -27,11 +28,11 @@ type ProviderRegistry interface { type GatewayServer struct { registry ProviderRegistry convs conversation.Store - logger *log.Logger + logger *slog.Logger } // New creates a GatewayServer bound to the provider registry. -func New(registry ProviderRegistry, convs conversation.Store, logger *log.Logger) *GatewayServer { +func New(registry ProviderRegistry, convs conversation.Store, logger *slog.Logger) *GatewayServer { return &GatewayServer{ registry: registry, convs: convs, @@ -94,11 +95,19 @@ func (s *GatewayServer) handleResponses(w http.ResponseWriter, r *http.Request) if req.PreviousResponseID != nil && *req.PreviousResponseID != "" { conv, err := s.convs.Get(*req.PreviousResponseID) if err != nil { - s.logger.Printf("error retrieving conversation: %v", err) + s.logger.ErrorContext(r.Context(), "failed to retrieve conversation", + slog.String("request_id", logger.FromContext(r.Context())), + slog.String("conversation_id", *req.PreviousResponseID), + slog.String("error", err.Error()), + ) http.Error(w, "error retrieving conversation", http.StatusInternalServerError) return } if conv == nil { + s.logger.WarnContext(r.Context(), "conversation not found", + slog.String("request_id", logger.FromContext(r.Context())), + slog.String("conversation_id", *req.PreviousResponseID), + ) http.Error(w, "conversation not found", http.StatusNotFound) return } @@ -140,7 +149,12 @@ func (s *GatewayServer) handleResponses(w http.ResponseWriter, r *http.Request) func (s *GatewayServer) handleSyncResponse(w http.ResponseWriter, r *http.Request, provider providers.Provider, providerMsgs []api.Message, resolvedReq *api.ResponseRequest, origReq *api.ResponseRequest, storeMsgs []api.Message) { result, err := provider.Generate(r.Context(), providerMsgs, resolvedReq) if err != nil { - s.logger.Printf("provider %s error: %v", provider.Name(), err) + s.logger.ErrorContext(r.Context(), "provider generation failed", + slog.String("request_id", logger.FromContext(r.Context())), + slog.String("provider", provider.Name()), + slog.String("model", resolvedReq.Model), + slog.String("error", err.Error()), + ) http.Error(w, "provider error", http.StatusBadGateway) return } @@ -155,10 +169,24 @@ func (s *GatewayServer) handleSyncResponse(w http.ResponseWriter, r *http.Reques } allMsgs := append(storeMsgs, assistantMsg) if _, err := s.convs.Create(responseID, result.Model, allMsgs); err != nil { - s.logger.Printf("error storing conversation: %v", err) + s.logger.ErrorContext(r.Context(), "failed to store conversation", + slog.String("request_id", logger.FromContext(r.Context())), + slog.String("response_id", responseID), + slog.String("error", err.Error()), + ) // Don't fail the response if storage fails } + s.logger.InfoContext(r.Context(), "response generated", + slog.String("request_id", logger.FromContext(r.Context())), + slog.String("provider", provider.Name()), + slog.String("model", result.Model), + slog.String("response_id", responseID), + slog.Int("input_tokens", result.Usage.InputTokens), + slog.Int("output_tokens", result.Usage.OutputTokens), + slog.Bool("has_tool_calls", len(result.ToolCalls) > 0), + ) + // Build spec-compliant response resp := s.buildResponse(origReq, result, provider.Name(), responseID) @@ -335,13 +363,20 @@ loop: } break loop case <-r.Context().Done(): - s.logger.Printf("client disconnected") + s.logger.InfoContext(r.Context(), "client disconnected", + slog.String("request_id", logger.FromContext(r.Context())), + ) return } } if streamErr != nil { - s.logger.Printf("stream error: %v", streamErr) + s.logger.ErrorContext(r.Context(), "stream error", + slog.String("request_id", logger.FromContext(r.Context())), + slog.String("provider", provider.Name()), + slog.String("model", origReq.Model), + slog.String("error", streamErr.Error()), + ) failedResp := s.buildResponse(origReq, &api.ProviderResult{ Model: origReq.Model, }, provider.Name(), responseID) @@ -477,9 +512,21 @@ loop: } allMsgs := append(storeMsgs, assistantMsg) if _, err := s.convs.Create(responseID, model, allMsgs); err != nil { - s.logger.Printf("error storing conversation: %v", err) + s.logger.ErrorContext(r.Context(), "failed to store conversation", + slog.String("request_id", logger.FromContext(r.Context())), + slog.String("response_id", responseID), + slog.String("error", err.Error()), + ) // Don't fail the response if storage fails } + + s.logger.InfoContext(r.Context(), "streaming response completed", + slog.String("request_id", logger.FromContext(r.Context())), + slog.String("provider", provider.Name()), + slog.String("model", model), + slog.String("response_id", responseID), + slog.Bool("has_tool_calls", len(toolCalls) > 0), + ) } } @@ -488,7 +535,10 @@ func (s *GatewayServer) sendSSE(w http.ResponseWriter, flusher http.Flusher, seq *seq++ data, err := json.Marshal(event) if err != nil { - s.logger.Printf("failed to marshal SSE event: %v", err) + s.logger.Error("failed to marshal SSE event", + slog.String("event_type", eventType), + slog.String("error", err.Error()), + ) return } fmt.Fprintf(w, "event: %s\ndata: %s\n\n", eventType, data)