FSharp.Data


Anonymizing JSON

BinderScriptNotebook

This tutorial shows how to implement an anonymizer for a JSON document (represented using the JsonValue type discussed in JSON parser article) This functionality is not directly available in the FSharp.Data package, but it can be very easily implemented by recursively walking over the JSON document.

If you want to use the JSON anonymizer in your code, you can copy the source from GitHub and just include it in your project. If you use these functions often and would like to see them in the FSharp.Data package, please submit a feature request.

DISCLAIMER: Don't use this for sensitive data as it's just a sample

open System
open System.Globalization
open FSharp.Data

type JsonAnonymizer(?propertiesToSkip, ?valuesToSkip) =

    let propertiesToSkip = Set.ofList (defaultArg propertiesToSkip [])
    let valuesToSkip = Set.ofList (defaultArg valuesToSkip [])

    let rng = Random()

    let digits = [| '0' .. '9' |]
    let lowerLetters = [| 'a' .. 'z' |]
    let upperLetters = [| 'A' .. 'Z' |]

    let getRandomChar (c: char) =
        if Char.IsDigit c then
            digits.[rng.Next(10)]
        elif Char.IsLetter c then
            if Char.IsLower c then
                lowerLetters.[rng.Next(26)]
            else
                upperLetters.[rng.Next(26)]
        else
            c

    let randomize (str: string) =
        String(str.ToCharArray() |> Array.map getRandomChar)

    let isType testType typ =
        match typ with
        | Runtime.StructuralTypes.InferedType.Primitive (typ, _, _, _) -> typ = testType
        | _ -> false

    let rec anonymize json =
        match json with
        | JsonValue.String s when valuesToSkip.Contains s -> json
        | JsonValue.String s ->
            let typ =
                Runtime.StructuralInference.inferPrimitiveType
                    Runtime.StructuralInference.defaultUnitsOfMeasureProvider
                    Runtime.StructuralInference.InferenceMode'.ValuesOnly
                    CultureInfo.InvariantCulture
                    s
                    None

            (if typ |> isType typeof<Guid> then
                 Guid.NewGuid().ToString()
             elif typ |> isType typeof<Runtime.StructuralTypes.Bit0>
                  || typ |> isType typeof<Runtime.StructuralTypes.Bit1> then
                 s
             elif typ |> isType typeof<DateTime> then
                 s
             else
                 let prefix, s =
                     if s.StartsWith "http://" then
                         "http://", s.Substring("http://".Length)
                     elif s.StartsWith "https://" then
                         "https://", s.Substring("https://".Length)
                     else
                         "", s

                 prefix + randomize s)
            |> JsonValue.String
        | JsonValue.Number d ->
            let typ =
                Runtime.StructuralInference.inferPrimitiveType
                    Runtime.StructuralInference.defaultUnitsOfMeasureProvider
                    Runtime.StructuralInference.InferenceMode'.ValuesOnly
                    CultureInfo.InvariantCulture
                    (d.ToString())
                    None

            if typ |> isType typeof<Runtime.StructuralTypes.Bit0>
               || typ |> isType typeof<Runtime.StructuralTypes.Bit1> then
                json
            else
                d.ToString()
                |> randomize
                |> Decimal.Parse
                |> JsonValue.Number
        | JsonValue.Float f ->
            f.ToString()
            |> randomize
            |> Double.Parse
            |> JsonValue.Float
        | JsonValue.Boolean _
        | JsonValue.Null -> json
        | JsonValue.Record props ->
            props
            |> Array.map (fun (key, value) ->
                let newValue =
                    if propertiesToSkip.Contains key then
                        value
                    else
                        anonymize value

                key, newValue)
            |> JsonValue.Record
        | JsonValue.Array array -> array |> Array.map anonymize |> JsonValue.Array

    member _.Anonymize json = anonymize json

let json =
    JsonValue.Load(
        __SOURCE_DIRECTORY__
        + "../../data/TwitterStream.json"
    )

printfn "%O" json

let anonymizedJson = (JsonAnonymizer [ "lang" ]).Anonymize json
printfn "%O" anonymizedJson

Related articles

namespace System
namespace System.Globalization
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
Multiple items
type JsonAnonymizer = new: ?propertiesToSkip: string list * ?valuesToSkip: string list -> JsonAnonymizer member Anonymize: json: JsonValue -> JsonValue

--------------------
new: ?propertiesToSkip: string list * ?valuesToSkip: string list -> JsonAnonymizer
val propertiesToSkip: string list option
val valuesToSkip: string list option
val propertiesToSkip: Set<string>
Multiple items
module Set from Microsoft.FSharp.Collections
<summary>Contains operations for working with values of type <see cref="T:Microsoft.FSharp.Collections.Set`1" />.</summary>

--------------------
type Set<'T (requires comparison)> = interface IReadOnlyCollection<'T> interface IComparable interface IEnumerable interface IEnumerable<'T> interface ICollection<'T> new: elements: seq<'T> -> Set<'T> member Add: value: 'T -> Set<'T> member Contains: value: 'T -> bool override Equals: obj -> bool member IsProperSubsetOf: otherSet: Set<'T> -> bool ...
<summary>Immutable sets based on binary trees, where elements are ordered by F# generic comparison. By default comparison is the F# structural comparison function or uses implementations of the IComparable interface on element values.</summary>
<remarks>See the <see cref="T:Microsoft.FSharp.Collections.SetModule" /> module for further operations on sets. All members of this class are thread-safe and may be used concurrently from multiple threads.</remarks>


--------------------
new: elements: seq<'T> -> Set<'T>
val ofList: elements: 'T list -> Set<'T> (requires comparison)
<summary>Builds a set that contains the same elements as the given list.</summary>
<param name="elements">The input list.</param>
<returns>A set containing the elements form the input list.</returns>
<example id="set-oflist"><code lang="fsharp"> let set = Set.ofList [1, 2, 3] printfn $"The set is {set} and type is {set.GetType().Name}" </code> The sample evaluates to the following output: <c>The set is set [(1, 2, 3)] and type is "FSharpSet`1"</c></example>
val defaultArg: arg: 'T option -> defaultValue: 'T -> 'T
<summary>Used to specify a default value for an optional argument in the implementation of a function</summary>
<param name="arg">An option representing the argument.</param>
<param name="defaultValue">The default value of the argument.</param>
<returns>The argument value. If it is None, the defaultValue is returned.</returns>
<example id="defaultArg-example"><code lang="fsharp"> type Vector(x: double, y: double, ?z: double) = let z = defaultArg z 0.0 member this.X = x member this.Y = y member this.Z = z let v1 = Vector(1.0, 2.0) v1.Z // Evaluates to 0. let v2 = Vector(1.0, 2.0, 3.0) v2.Z // Evaluates to 3.0 </code></example>
val valuesToSkip: Set<string>
val rng: Random
Multiple items
type Random = new: unit -> unit + 1 overload member Next: unit -> int + 2 overloads member NextBytes: buffer: byte[] -> unit + 1 overload member NextDouble: unit -> float member NextInt64: unit -> int64 + 2 overloads member NextSingle: unit -> float32 static member Shared: Random
<summary>Represents a pseudo-random number generator, which is an algorithm that produces a sequence of numbers that meet certain statistical requirements for randomness.</summary>

--------------------
Random() : Random
Random(Seed: int) : Random
val digits: char[]
val lowerLetters: char[]
val upperLetters: char[]
val getRandomChar: (char -> char)
val c: char
Multiple items
val char: value: 'T -> char (requires member op_Explicit)
<summary>Converts the argument to character. Numeric inputs are converted according to the UTF-16 encoding for characters. String inputs must be exactly one character long. For other input types the operation requires an appropriate static conversion method on the input type.</summary>
<param name="value">The input value.</param>
<returns>The converted char.</returns>
<example id="char-example"><code lang="fsharp"></code></example>


--------------------
[<Struct>] type char = Char
<summary>An abbreviation for the CLI type <see cref="T:System.Char" />.</summary>
<category>Basic Types</category>
[<Struct>] type Char = member CompareTo: value: char -> int + 1 overload member Equals: obj: char -> bool + 1 overload member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 2 overloads static member ConvertFromUtf32: utf32: int -> string static member ConvertToUtf32: highSurrogate: char * lowSurrogate: char -> int + 1 overload static member GetNumericValue: c: char -> float + 1 overload static member GetUnicodeCategory: c: char -> UnicodeCategory + 1 overload static member IsAscii: c: char -> bool ...
<summary>Represents a character as a UTF-16 code unit.</summary>
Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
Char.IsLetter(c: char) : bool
Char.IsLetter(s: string, index: int) : bool
Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool
val randomize: (string -> String)
val str: string
Multiple items
val string: value: 'T -> string
<summary>Converts the argument to a string using <c>ToString</c>.</summary>
<remarks>For standard integer and floating point values the and any type that implements <c>IFormattable</c><c>ToString</c> conversion uses <c>CultureInfo.InvariantCulture</c>. </remarks>
<param name="value">The input value.</param>
<returns>The converted string.</returns>
<example id="string-example"><code lang="fsharp"></code></example>


--------------------
type string = String
<summary>An abbreviation for the CLI type <see cref="T:System.String" />.</summary>
<category>Basic Types</category>
Multiple items
type String = interface IEnumerable<char> interface IEnumerable interface ICloneable interface IComparable interface IComparable<string> interface IConvertible interface IEquatable<string> new: value: nativeptr<char> -> unit + 8 overloads member Clone: unit -> obj member CompareTo: value: obj -> int + 1 overload ...
<summary>Represents text as a sequence of UTF-16 code units.</summary>

--------------------
String(value: nativeptr<char>) : String
String(value: char[]) : String
String(value: ReadOnlySpan<char>) : String
String(value: nativeptr<sbyte>) : String
String(c: char, count: int) : String
String(value: nativeptr<char>, startIndex: int, length: int) : String
String(value: char[], startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : String
String.ToCharArray() : char[]
String.ToCharArray(startIndex: int, length: int) : char[]
type Array = interface ICollection interface IEnumerable interface IList interface IStructuralComparable interface IStructuralEquatable interface ICloneable member Clone: unit -> obj member CopyTo: array: Array * index: int -> unit + 1 overload member GetEnumerator: unit -> IEnumerator member GetLength: dimension: int -> int ...
<summary>Provides methods for creating, manipulating, searching, and sorting arrays, thereby serving as the base class for all arrays in the common language runtime.</summary>
val map: mapping: ('T -> 'U) -> array: 'T[] -> 'U[]
<summary>Builds a new array whose elements are the results of applying the given function to each of the elements of the array.</summary>
<param name="mapping">The function to transform elements of the array.</param>
<param name="array">The input array.</param>
<returns>The array of transformed elements.</returns>
<exception cref="T:System.ArgumentNullException">Thrown when the input array is null.</exception>
<example id="map-1"><code lang="fsharp"> let inputs = [| "a"; "bbb"; "cc" |] inputs |&gt; Array.map (fun x -&gt; x.Length) </code> Evaluates to <c>[| 1; 3; 2 |]</c></example>
val isType: (Type -> Runtime.StructuralTypes.InferedType -> bool)
val testType: Type
val typ: Runtime.StructuralTypes.InferedType
Multiple items
namespace FSharp.Data.Runtime

--------------------
namespace System.Runtime
val typ: Type
val anonymize: (JsonValue -> JsonValue)
val json: JsonValue
type JsonValue = | String of string | Number of decimal | Float of float | Record of properties: (string * JsonValue)[] | Array of elements: JsonValue[] | Boolean of bool | Null member Request: url: string * ?httpMethod: string * ?headers: seq<string * string> -> HttpResponse member RequestAsync: url: string * ?httpMethod: string * ?headers: seq<string * string> -> Async<HttpResponse> member ToString: saveOptions: JsonSaveOptions -> string + 1 overload member WriteTo: w: TextWriter * saveOptions: JsonSaveOptions -> unit static member AsyncLoad: uri: string * ?encoding: Encoding -> Async<JsonValue> static member Load: stream: Stream -> JsonValue + 2 overloads static member Parse: text: string -> JsonValue static member ParseMultiple: text: string -> seq<JsonValue> static member TryParse: text: string -> JsonValue option
<summary> Represents a JSON value. Large numbers that do not fit in the Decimal type are represented using the Float case, while smaller numbers are represented as decimals to avoid precision loss. </summary>
union case JsonValue.String: string -> JsonValue
val s: string
member Set.Contains: value: 'T -> bool
Multiple items
type CultureInfo = interface ICloneable interface IFormatProvider new: culture: int -> unit + 3 overloads member ClearCachedData: unit -> unit member Clone: unit -> obj member Equals: value: obj -> bool member GetConsoleFallbackUICulture: unit -> CultureInfo member GetFormat: formatType: Type -> obj member GetHashCode: unit -> int member ToString: unit -> string ...
<summary>Provides information about a specific culture (called a locale for unmanaged code development). The information includes the names for the culture, the writing system, the calendar used, the sort order of strings, and formatting for dates and numbers.</summary>

--------------------
CultureInfo(culture: int) : CultureInfo
CultureInfo(name: string) : CultureInfo
CultureInfo(culture: int, useUserOverride: bool) : CultureInfo
CultureInfo(name: string, useUserOverride: bool) : CultureInfo
property CultureInfo.InvariantCulture: CultureInfo with get
<summary>Gets the <see cref="T:System.Globalization.CultureInfo" /> object that is culture-independent (invariant).</summary>
<returns>The object that is culture-independent (invariant).</returns>
union case Option.None: Option<'T>
<summary>The representation of "No value"</summary>
val typeof<'T> : Type
<summary>Generate a System.Type runtime representation of a static type.</summary>
<example id="typeof-example"><code lang="fsharp"> let t = typeof&lt;int&gt; // Gets the System.Type t.FullName // Evaluates to "System.Int32" </code></example>
Multiple items
[<Struct>] type Guid = new: b: byte[] -> unit + 5 overloads member CompareTo: value: Guid -> int + 1 overload member Equals: g: Guid -> bool + 1 overload member GetHashCode: unit -> int member ToByteArray: unit -> byte[] member ToString: unit -> string + 2 overloads member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> -> bool member TryWriteBytes: destination: Span<byte> -> bool static member (<>) : a: Guid * b: Guid -> bool static member (=) : a: Guid * b: Guid -> bool ...
<summary>Represents a globally unique identifier (GUID).</summary>

--------------------
Guid ()
Guid(b: byte[]) : Guid
Guid(b: ReadOnlySpan<byte>) : Guid
Guid(g: string) : Guid
Guid(a: int, b: int16, c: int16, d: byte[]) : Guid
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid.NewGuid() : Guid
Multiple items
[<Struct>] type DateTime = new: year: int * month: int * day: int -> unit + 10 overloads member Add: value: TimeSpan -> DateTime member AddDays: value: float -> DateTime member AddHours: value: float -> DateTime member AddMilliseconds: value: float -> DateTime member AddMinutes: value: float -> DateTime member AddMonths: months: int -> DateTime member AddSeconds: value: float -> DateTime member AddTicks: value: int64 -> DateTime member AddYears: value: int -> DateTime ...
<summary>Represents an instant in time, typically expressed as a date and time of day.</summary>

--------------------
DateTime ()
   (+0 other overloads)
DateTime(ticks: int64) : DateTime
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
val prefix: string
String.StartsWith(value: string) : bool
String.StartsWith(value: char) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: CultureInfo) : bool
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
union case JsonValue.Number: decimal -> JsonValue
val d: decimal
Decimal.ToString() : string
Decimal.ToString(format: string) : string
Decimal.ToString(provider: IFormatProvider) : string
Decimal.ToString(format: string, provider: IFormatProvider) : string
Multiple items
[<Struct>] type Decimal = new: value: float -> unit + 8 overloads member CompareTo: value: decimal -> int + 1 overload member Equals: value: decimal -> bool + 2 overloads member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 3 overloads member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> * ?provider: IFormatProvider -> bool static member (%) : d1: decimal * d2: decimal -> decimal static member ( * ) : d1: decimal * d2: decimal -> decimal static member (+) : d1: decimal * d2: decimal -> decimal ...
<summary>Represents a decimal floating-point number.</summary>

--------------------
Decimal ()
Decimal(value: float) : Decimal
Decimal(value: int) : Decimal
Decimal(bits: int[]) : Decimal
Decimal(value: int64) : Decimal
Decimal(bits: ReadOnlySpan<int>) : Decimal
Decimal(value: float32) : Decimal
Decimal(value: uint32) : Decimal
Decimal(value: uint64) : Decimal
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : Decimal
Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles) : decimal
Decimal.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : decimal
Decimal.Parse(s: ReadOnlySpan<char>, ?style: NumberStyles, ?provider: IFormatProvider) : decimal
union case JsonValue.Float: float -> JsonValue
val f: float
Double.ToString() : string
Double.ToString(format: string) : string
Double.ToString(provider: IFormatProvider) : string
Double.ToString(format: string, provider: IFormatProvider) : string
[<Struct>] type Double = member CompareTo: value: float -> int + 1 overload member Equals: obj: float -> bool + 1 overload member GetHashCode: unit -> int member GetTypeCode: unit -> TypeCode member ToString: unit -> string + 3 overloads member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> * ?provider: IFormatProvider -> bool static member (<) : left: float * right: float -> bool static member (<=) : left: float * right: float -> bool static member (<>) : left: float * right: float -> bool static member (=) : left: float * right: float -> bool ...
<summary>Represents a double-precision floating-point number.</summary>
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles) : float
Double.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : float
Double.Parse(s: ReadOnlySpan<char>, ?style: NumberStyles, ?provider: IFormatProvider) : float
union case JsonValue.Boolean: bool -> JsonValue
union case JsonValue.Null: JsonValue
union case JsonValue.Record: properties: (string * JsonValue)[] -> JsonValue
val props: (string * JsonValue)[]
val key: string
val value: JsonValue
val newValue: JsonValue
union case JsonValue.Array: elements: JsonValue[] -> JsonValue
Multiple items
val array: JsonValue[]

--------------------
type 'T array = 'T[]
<summary>Single dimensional, zero-based arrays, written <c>int[]</c>, <c>string[]</c> etc.</summary>
<remarks>Use the values in the <see cref="T:Microsoft.FSharp.Collections.ArrayModule" /> module to manipulate values of this type, or the notation <c>arr.[x]</c> to get/set array values.</remarks>
<category>Basic Types</category>
static member JsonValue.Load: reader: IO.TextReader -> JsonValue
static member JsonValue.Load: stream: IO.Stream -> JsonValue
static member JsonValue.Load: uri: string * ?encoding: Text.Encoding -> JsonValue
val printfn: format: Printf.TextWriterFormat<'T> -> 'T
<summary>Print to <c>stdout</c> using the given format, and add a newline.</summary>
<param name="format">The formatter.</param>
<returns>The formatted result.</returns>
<example>See <c>Printf.printfn</c> (link: <see cref="M:Microsoft.FSharp.Core.PrintfModule.PrintFormatLine``1" />) for examples.</example>
val anonymizedJson: JsonValue