Header menu logo FSharp.Data

Anonymizing JSON

BinderScriptNotebook

This tutorial shows how to implement an anonymizer for a JSON document (represented using the JsonValue type discussed in JSON parser article) This functionality is not directly available in the FSharp.Data package, but it can be very easily implemented by recursively walking over the JSON document.

If you want to use the JSON anonymizer in your code, you can copy the source from GitHub and just include it in your project. If you use these functions often and would like to see them in the FSharp.Data package, please submit a feature request.

DISCLAIMER: Don't use this for sensitive data as it's just a sample

open System
open System.Globalization
open FSharp.Data

type JsonAnonymizer(?propertiesToSkip, ?valuesToSkip) =

    let propertiesToSkip = Set.ofList (defaultArg propertiesToSkip [])
    let valuesToSkip = Set.ofList (defaultArg valuesToSkip [])

    let rng = Random()

    let digits = [| '0' .. '9' |]
    let lowerLetters = [| 'a' .. 'z' |]
    let upperLetters = [| 'A' .. 'Z' |]

    let getRandomChar (c: char) =
        if Char.IsDigit c then
            digits.[rng.Next(10)]
        elif Char.IsLetter c then
            if Char.IsLower c then
                lowerLetters.[rng.Next(26)]
            else
                upperLetters.[rng.Next(26)]
        else
            c

    let randomize (str: string) =
        String(str.ToCharArray() |> Array.map getRandomChar)

    let isType testType typ =
        match typ with
        | Runtime.StructuralTypes.InferedType.Primitive (typ, _, _, _) -> typ = testType
        | _ -> false

    let rec anonymize json =
        match json with
        | JsonValue.String s when valuesToSkip.Contains s -> json
        | JsonValue.String s ->
            let typ =
                Runtime.StructuralInference.inferPrimitiveType
                    Runtime.StructuralInference.defaultUnitsOfMeasureProvider
                    Runtime.StructuralInference.InferenceMode'.ValuesOnly
                    CultureInfo.InvariantCulture
                    s
                    None

            (if typ |> isType typeof<Guid> then
                 Guid.NewGuid().ToString()
             elif typ |> isType typeof<Runtime.StructuralTypes.Bit0>
                  || typ |> isType typeof<Runtime.StructuralTypes.Bit1> then
                 s
             elif typ |> isType typeof<DateTime> then
                 s
             else
                 let prefix, s =
                     if s.StartsWith "http://" then
                         "http://", s.Substring("http://".Length)
                     elif s.StartsWith "https://" then
                         "https://", s.Substring("https://".Length)
                     else
                         "", s

                 prefix + randomize s)
            |> JsonValue.String
        | JsonValue.Number d ->
            let typ =
                Runtime.StructuralInference.inferPrimitiveType
                    Runtime.StructuralInference.defaultUnitsOfMeasureProvider
                    Runtime.StructuralInference.InferenceMode'.ValuesOnly
                    CultureInfo.InvariantCulture
                    (d.ToString())
                    None

            if typ |> isType typeof<Runtime.StructuralTypes.Bit0>
               || typ |> isType typeof<Runtime.StructuralTypes.Bit1> then
                json
            else
                d.ToString()
                |> randomize
                |> Decimal.Parse
                |> JsonValue.Number
        | JsonValue.Float f ->
            f.ToString()
            |> randomize
            |> Double.Parse
            |> JsonValue.Float
        | JsonValue.Boolean _
        | JsonValue.Null -> json
        | JsonValue.Record props ->
            props
            |> Array.map (fun (key, value) ->
                let newValue =
                    if propertiesToSkip.Contains key then
                        value
                    else
                        anonymize value

                key, newValue)
            |> JsonValue.Record
        | JsonValue.Array array -> array |> Array.map anonymize |> JsonValue.Array

    member _.Anonymize json = anonymize json

let json =
    JsonValue.Load(
        __SOURCE_DIRECTORY__
        + "../../data/TwitterStream.json"
    )

printfn "%O" json

let anonymizedJson = (JsonAnonymizer [ "lang" ]).Anonymize json
printfn "%O" anonymizedJson

Related articles

namespace System
namespace System.Globalization
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
Multiple items
type JsonAnonymizer = new: ?propertiesToSkip: string list * ?valuesToSkip: string list -> JsonAnonymizer member Anonymize: json: JsonValue -> JsonValue

--------------------
new: ?propertiesToSkip: string list * ?valuesToSkip: string list -> JsonAnonymizer
val propertiesToSkip: string list option
val valuesToSkip: string list option
val propertiesToSkip: Set<string>
Multiple items
module Set from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> = interface IReadOnlyCollection<'T> interface IStructuralEquatable interface IComparable interface IEnumerable interface IEnumerable<'T> interface ICollection<'T> new: elements: 'T seq -> Set<'T> member Add: value: 'T -> Set<'T> member Contains: value: 'T -> bool override Equals: obj -> bool ...

--------------------
new: elements: 'T seq -> Set<'T>
val ofList: elements: 'T list -> Set<'T> (requires comparison)
val defaultArg: arg: 'T option -> defaultValue: 'T -> 'T
val valuesToSkip: Set<string>
val rng: Random
Multiple items
type Random = new: unit -> unit + 1 overload member Next: unit -> int + 2 overloads member NextBytes: buffer: byte array -> unit + 1 overload member NextDouble: unit -> float member NextInt64: unit -> int64 + 2 overloads member NextSingle: unit -> float32 static member Shared: Random
<summary>Represents a pseudo-random number generator, which is an algorithm that produces a sequence of numbers that meet certain statistical requirements for randomness.</summary>

--------------------
Random() : Random
Random(Seed: int) : Random
val digits: char array
val lowerLetters: char array
val upperLetters: char array
val getRandomChar: c: char -> char
val c: char
Multiple items
val char: value: 'T -> char (requires member op_Explicit)

--------------------
type char = Char
[<Struct>] type Char = member CompareTo: value: char -> int + 1 overload member Equals: obj: char -> bool + 1 overload member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 2 overloads static member ConvertFromUtf32: utf32: int -> string static member ConvertToUtf32: highSurrogate: char * lowSurrogate: char -> int + 1 overload static member GetNumericValue: c: char -> float + 1 overload static member GetUnicodeCategory: c: char -> UnicodeCategory + 1 overload static member IsAscii: c: char -> bool ...
<summary>Represents a character as a UTF-16 code unit.</summary>
Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
Char.IsLetter(c: char) : bool
Char.IsLetter(s: string, index: int) : bool
Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool
val randomize: str: string -> String
val str: string
Multiple items
val string: value: 'T -> string

--------------------
type string = String
Multiple items
type String = interface IEnumerable<char> interface IEnumerable interface ICloneable interface IComparable interface IComparable<string> interface IConvertible interface IEquatable<string> new: value: nativeptr<char> -> unit + 8 overloads member Clone: unit -> obj member CompareTo: value: obj -> int + 1 overload ...
<summary>Represents text as a sequence of UTF-16 code units.</summary>

--------------------
String(value: nativeptr<char>) : String
String(value: char array) : String
String(value: ReadOnlySpan<char>) : String
String(value: nativeptr<sbyte>) : String
String(c: char, count: int) : String
String(value: nativeptr<char>, startIndex: int, length: int) : String
String(value: char array, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : String
String.ToCharArray() : char array
String.ToCharArray(startIndex: int, length: int) : char array
type Array = interface ICollection interface IEnumerable interface IList interface IStructuralComparable interface IStructuralEquatable interface ICloneable member Clone: unit -> obj member CopyTo: array: Array * index: int -> unit + 1 overload member GetEnumerator: unit -> IEnumerator member GetLength: dimension: int -> int ...
<summary>Provides methods for creating, manipulating, searching, and sorting arrays, thereby serving as the base class for all arrays in the common language runtime.</summary>
val map: mapping: ('T -> 'U) -> array: 'T array -> 'U array
val isType: testType: Type -> typ: Runtime.StructuralTypes.InferedType -> bool
val testType: Type
val typ: Runtime.StructuralTypes.InferedType
Multiple items
namespace FSharp.Data.Runtime

--------------------
namespace System.Runtime
val typ: Type
val anonymize: json: JsonValue -> JsonValue
val json: JsonValue
type JsonValue = | String of string | Number of decimal | Float of float | Record of properties: (string * JsonValue) array | Array of elements: JsonValue array | Boolean of bool | Null member Request: url: string * [<Optional>] ?httpMethod: string * [<Optional>] ?headers: (string * string) seq -> HttpResponse member RequestAsync: url: string * [<Optional>] ?httpMethod: string * [<Optional>] ?headers: (string * string) seq -> Async<HttpResponse> member ToString: saveOptions: JsonSaveOptions -> string + 1 overload member WriteTo: w: TextWriter * saveOptions: JsonSaveOptions -> unit static member AsyncLoad: uri: string * [<Optional>] ?encoding: Encoding -> Async<JsonValue> static member Load: stream: Stream -> JsonValue + 2 overloads static member Parse: text: string -> JsonValue static member ParseMultiple: text: string -> JsonValue seq static member TryParse: text: string -> JsonValue option
<summary> Represents a JSON value. Large numbers that do not fit in the Decimal type are represented using the Float case, while smaller numbers are represented as decimals to avoid precision loss. </summary>
union case JsonValue.String: string -> JsonValue
val s: string
member Set.Contains: value: 'T -> bool
Multiple items
type CultureInfo = interface ICloneable interface IFormatProvider new: culture: int -> unit + 3 overloads member ClearCachedData: unit -> unit member Clone: unit -> obj member Equals: value: obj -> bool member GetConsoleFallbackUICulture: unit -> CultureInfo member GetFormat: formatType: Type -> obj member GetHashCode: unit -> int member ToString: unit -> string ...
<summary>Provides information about a specific culture (called a locale for unmanaged code development). The information includes the names for the culture, the writing system, the calendar used, the sort order of strings, and formatting for dates and numbers.</summary>

--------------------
CultureInfo(culture: int) : CultureInfo
CultureInfo(name: string) : CultureInfo
CultureInfo(culture: int, useUserOverride: bool) : CultureInfo
CultureInfo(name: string, useUserOverride: bool) : CultureInfo
property CultureInfo.InvariantCulture: CultureInfo with get
<summary>Gets the <see cref="T:System.Globalization.CultureInfo" /> object that is culture-independent (invariant).</summary>
<returns>The object that is culture-independent (invariant).</returns>
union case Option.None: Option<'T>
val typeof<'T> : Type
Multiple items
[<Struct>] type Guid = new: b: byte array -> unit + 5 overloads member CompareTo: value: Guid -> int + 1 overload member Equals: g: Guid -> bool + 1 overload member GetHashCode: unit -> int member ToByteArray: unit -> byte array member ToString: unit -> string + 2 overloads member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> -> bool member TryWriteBytes: destination: Span<byte> -> bool static member (<>) : a: Guid * b: Guid -> bool static member (=) : a: Guid * b: Guid -> bool ...
<summary>Represents a globally unique identifier (GUID).</summary>

--------------------
Guid ()
Guid(b: byte array) : Guid
Guid(b: ReadOnlySpan<byte>) : Guid
Guid(g: string) : Guid
Guid(a: int, b: int16, c: int16, d: byte array) : Guid
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid.NewGuid() : Guid
Multiple items
[<Struct>] type DateTime = new: year: int * month: int * day: int -> unit + 10 overloads member Add: value: TimeSpan -> DateTime member AddDays: value: float -> DateTime member AddHours: value: float -> DateTime member AddMilliseconds: value: float -> DateTime member AddMinutes: value: float -> DateTime member AddMonths: months: int -> DateTime member AddSeconds: value: float -> DateTime member AddTicks: value: int64 -> DateTime member AddYears: value: int -> DateTime ...
<summary>Represents an instant in time, typically expressed as a date and time of day.</summary>

--------------------
DateTime ()
   (+0 other overloads)
DateTime(ticks: int64) : DateTime
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
val prefix: string
String.StartsWith(value: string) : bool
String.StartsWith(value: char) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: CultureInfo) : bool
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
union case JsonValue.Number: decimal -> JsonValue
val d: decimal
Decimal.ToString() : string
Decimal.ToString(format: string) : string
Decimal.ToString(provider: IFormatProvider) : string
Decimal.ToString(format: string, provider: IFormatProvider) : string
Multiple items
[<Struct>] type Decimal = new: value: float -> unit + 8 overloads member CompareTo: value: decimal -> int + 1 overload member Equals: value: decimal -> bool + 2 overloads member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 3 overloads member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> * ?provider: IFormatProvider -> bool static member (%) : d1: decimal * d2: decimal -> decimal static member ( * ) : d1: decimal * d2: decimal -> decimal static member (+) : d1: decimal * d2: decimal -> decimal ...
<summary>Represents a decimal floating-point number.</summary>

--------------------
Decimal ()
Decimal(value: float) : Decimal
Decimal(value: int) : Decimal
Decimal(bits: int array) : Decimal
Decimal(value: int64) : Decimal
Decimal(bits: ReadOnlySpan<int>) : Decimal
Decimal(value: float32) : Decimal
Decimal(value: uint32) : Decimal
Decimal(value: uint64) : Decimal
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : Decimal
Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles) : decimal
Decimal.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : decimal
Decimal.Parse(s: ReadOnlySpan<char>, ?style: NumberStyles, ?provider: IFormatProvider) : decimal
union case JsonValue.Float: float -> JsonValue
val f: float
Double.ToString() : string
Double.ToString(format: string) : string
Double.ToString(provider: IFormatProvider) : string
Double.ToString(format: string, provider: IFormatProvider) : string
[<Struct>] type Double = member CompareTo: value: float -> int + 1 overload member Equals: obj: float -> bool + 1 overload member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 3 overloads member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> * ?provider: IFormatProvider -> bool static member (<) : left: float * right: float -> bool static member (<=) : left: float * right: float -> bool static member (<>) : left: float * right: float -> bool static member (=) : left: float * right: float -> bool ...
<summary>Represents a double-precision floating-point number.</summary>
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles) : float
Double.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : float
Double.Parse(s: ReadOnlySpan<char>, ?style: NumberStyles, ?provider: IFormatProvider) : float
union case JsonValue.Boolean: bool -> JsonValue
union case JsonValue.Null: JsonValue
union case JsonValue.Record: properties: (string * JsonValue) array -> JsonValue
val props: (string * JsonValue) array
val key: string
val value: JsonValue
val newValue: JsonValue
union case JsonValue.Array: elements: JsonValue array -> JsonValue
Multiple items
val array: JsonValue array

--------------------
type 'T array = 'T array
static member JsonValue.Load: reader: IO.TextReader -> JsonValue
static member JsonValue.Load: stream: IO.Stream -> JsonValue
static member JsonValue.Load: uri: string * [<Runtime.InteropServices.Optional>] ?encoding: Text.Encoding -> JsonValue
val printfn: format: Printf.TextWriterFormat<'T> -> 'T
val anonymizedJson: JsonValue

Type something to start searching.