2022-08-13 13:32:34 -05:00
// Copyright 2022 The Gitea Authors. All rights reserved.
2022-11-27 13:20:29 -05:00
// SPDX-License-Identifier: MIT
2022-01-06 20:18:52 -05:00
2022-08-13 13:32:34 -05:00
//go:generate go run invisible/generate.go -v -o ./invisible_gen.go
//go:generate go run ambiguous/generate.go -v -o ./ambiguous_gen.go ambiguous/ambiguous.json
2022-01-06 20:18:52 -05:00
package charset
import (
2023-12-17 09:38:54 -05:00
"html/template"
2022-01-06 20:18:52 -05:00
"io"
2024-02-21 16:18:44 -05:00
"slices"
2022-01-06 20:18:52 -05:00
"strings"
2022-08-13 13:32:34 -05:00
"code.gitea.io/gitea/modules/log"
2023-12-17 09:38:54 -05:00
"code.gitea.io/gitea/modules/setting"
2022-08-13 13:32:34 -05:00
"code.gitea.io/gitea/modules/translation"
2022-01-06 20:18:52 -05:00
)
2022-08-13 13:32:34 -05:00
// RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0
2022-01-06 20:18:52 -05:00
2024-02-21 16:18:44 -05:00
type escapeContext string
// Keep this consistent with the documentation of [ui].SKIP_ESCAPE_CONTEXTS
// Defines the different contexts that could be used to escape in.
const (
// Wiki pages.
WikiContext escapeContext = "wiki"
// Rendered content (except markup), source code and blames.
FileviewContext escapeContext = "file-view"
// Commits or pull requet's diff.
DiffContext escapeContext = "diff"
)
2022-08-13 13:32:34 -05:00
// EscapeControlHTML escapes the unicode control sequences in a provided html document
2024-02-21 16:18:44 -05:00
func EscapeControlHTML ( html template . HTML , locale translation . Locale , context escapeContext , allowed ... rune ) ( escaped * EscapeStatus , output template . HTML ) {
2022-01-06 20:18:52 -05:00
sb := & strings . Builder { }
2024-02-21 16:18:44 -05:00
escaped , _ = EscapeControlReader ( strings . NewReader ( string ( html ) ) , sb , locale , context , allowed ... ) // err has been handled in EscapeControlReader
2023-12-17 09:38:54 -05:00
return escaped , template . HTML ( sb . String ( ) )
2022-01-06 20:18:52 -05:00
}
2023-12-17 09:38:54 -05:00
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
2024-02-21 16:18:44 -05:00
func EscapeControlReader ( reader io . Reader , writer io . Writer , locale translation . Locale , context escapeContext , allowed ... rune ) ( escaped * EscapeStatus , err error ) {
if ! setting . UI . AmbiguousUnicodeDetection || slices . Contains ( setting . UI . SkipEscapeContexts , string ( context ) ) {
2023-12-17 09:38:54 -05:00
_ , err = io . Copy ( writer , reader )
return & EscapeStatus { } , err
}
2022-08-13 13:32:34 -05:00
outputStream := & HTMLStreamerWriter { Writer : writer }
streamer := NewEscapeStreamer ( locale , outputStream , allowed ... ) . ( * escapeStreamer )
2022-01-06 20:18:52 -05:00
2022-08-13 13:32:34 -05:00
if err = StreamHTML ( reader , streamer ) ; err != nil {
streamer . escaped . HasError = true
log . Error ( "Error whilst escaping: %v" , err )
2022-01-06 20:18:52 -05:00
}
2022-08-13 13:32:34 -05:00
return streamer . escaped , err
2022-01-06 20:18:52 -05:00
}