
This tutorial shows how to implement an anonymizer for a JSON document (represented using
the JsonValue type discussed in JSON parser article)
This functionality is not directly available in the FSharp.Data package, but it can
be very easily implemented by recursively walking over the JSON document.
If you want to use the JSON anonymizer in your code, you can copy the
source from GitHub and just include it in your project. If you use these
functions often and would like to see them in the FSharp.Data package, please submit
a feature request.
DISCLAIMER: Don't use this for sensitive data as it's just a sample
open System
open System.Globalization
open FSharp.Data
type JsonAnonymizer(?propertiesToSkip, ?valuesToSkip) =
let propertiesToSkip = Set.ofList (defaultArg propertiesToSkip [])
let valuesToSkip = Set.ofList (defaultArg valuesToSkip [])
let rng = Random()
let digits = [| '0' .. '9' |]
let lowerLetters = [| 'a' .. 'z' |]
let upperLetters = [| 'A' .. 'Z' |]
let getRandomChar (c: char) =
if Char.IsDigit c then
digits.[rng.Next(10)]
elif Char.IsLetter c then
if Char.IsLower c then
lowerLetters.[rng.Next(26)]
else
upperLetters.[rng.Next(26)]
else
c
let randomize (str: string) =
String(str.ToCharArray() |> Array.map getRandomChar)
let isType testType typ =
match typ with
| Runtime.StructuralTypes.InferedType.Primitive (typ, _, _, _) -> typ = testType
| _ -> false
let rec anonymize json =
match json with
| JsonValue.String s when valuesToSkip.Contains s -> json
| JsonValue.String s ->
let typ =
Runtime.StructuralInference.inferPrimitiveType
Runtime.StructuralInference.defaultUnitsOfMeasureProvider
Runtime.StructuralInference.InferenceMode'.ValuesOnly
CultureInfo.InvariantCulture
s
None
(if typ |> isType typeof<Guid> then
Guid.NewGuid().ToString()
elif typ |> isType typeof<Runtime.StructuralTypes.Bit0>
|| typ |> isType typeof<Runtime.StructuralTypes.Bit1> then
s
elif typ |> isType typeof<DateTime> then
s
else
let prefix, s =
if s.StartsWith "http://" then
"http://", s.Substring("http://".Length)
elif s.StartsWith "https://" then
"https://", s.Substring("https://".Length)
else
"", s
prefix + randomize s)
|> JsonValue.String
| JsonValue.Number d ->
let typ =
Runtime.StructuralInference.inferPrimitiveType
Runtime.StructuralInference.defaultUnitsOfMeasureProvider
Runtime.StructuralInference.InferenceMode'.ValuesOnly
CultureInfo.InvariantCulture
(d.ToString())
None
if typ |> isType typeof<Runtime.StructuralTypes.Bit0>
|| typ |> isType typeof<Runtime.StructuralTypes.Bit1> then
json
else
d.ToString()
|> randomize
|> Decimal.Parse
|> JsonValue.Number
| JsonValue.Float f ->
f.ToString()
|> randomize
|> Double.Parse
|> JsonValue.Float
| JsonValue.Boolean _
| JsonValue.Null -> json
| JsonValue.Record props ->
props
|> Array.map (fun (key, value) ->
let newValue =
if propertiesToSkip.Contains key then
value
else
anonymize value
key, newValue)
|> JsonValue.Record
| JsonValue.Array array -> array |> Array.map anonymize |> JsonValue.Array
member _.Anonymize json = anonymize json
let json =
JsonValue.Load(
__SOURCE_DIRECTORY__
+ "../../data/TwitterStream.json"
)
printfn "%O" json
let anonymizedJson = (JsonAnonymizer [ "lang" ]).Anonymize json
printfn "%O" anonymizedJson
namespace System
namespace System.Globalization
Multiple items
namespace FSharp
--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data
--------------------
namespace Microsoft.FSharp.Data
Multiple items
type JsonAnonymizer =
new: ?propertiesToSkip: string list * ?valuesToSkip: string list -> JsonAnonymizer
member Anonymize: json: JsonValue -> JsonValue
--------------------
new: ?propertiesToSkip: string list * ?valuesToSkip: string list -> JsonAnonymizer
val propertiesToSkip: string list option
val valuesToSkip: string list option
val propertiesToSkip: Set<string>
Multiple items
module Set
from Microsoft.FSharp.Collections
<summary>Contains operations for working with values of type <see cref="T:Microsoft.FSharp.Collections.Set`1" />.</summary>
--------------------
type Set<'T (requires comparison)> =
interface IReadOnlyCollection<'T>
interface IComparable
interface IEnumerable
interface IEnumerable<'T>
interface ICollection<'T>
new: elements: seq<'T> -> Set<'T>
member Add: value: 'T -> Set<'T>
member Contains: value: 'T -> bool
override Equals: obj -> bool
member IsProperSubsetOf: otherSet: Set<'T> -> bool
...
<summary>Immutable sets based on binary trees, where elements are ordered by F# generic comparison. By default
comparison is the F# structural comparison function or uses implementations of the IComparable interface on element values.</summary>
<remarks>See the <see cref="T:Microsoft.FSharp.Collections.SetModule" /> module for further operations on sets.
All members of this class are thread-safe and may be used concurrently from multiple threads.</remarks>
--------------------
new: elements: seq<'T> -> Set<'T>
val ofList: elements: 'T list -> Set<'T> (requires comparison)
<summary>Builds a set that contains the same elements as the given list.</summary>
<param name="elements">The input list.</param>
<returns>A set containing the elements form the input list.</returns>
<example id="set-oflist"><code lang="fsharp">
let set = Set.ofList [1, 2, 3]
printfn $"The set is {set} and type is {set.GetType().Name}"
</code>
The sample evaluates to the following output: <c>The set is set [(1, 2, 3)] and type is "FSharpSet`1"</c></example>
val defaultArg: arg: 'T option -> defaultValue: 'T -> 'T
<summary>Used to specify a default value for an optional argument in the implementation of a function</summary>
<param name="arg">An option representing the argument.</param>
<param name="defaultValue">The default value of the argument.</param>
<returns>The argument value. If it is None, the defaultValue is returned.</returns>
<example id="defaultArg-example"><code lang="fsharp">
type Vector(x: double, y: double, ?z: double) =
let z = defaultArg z 0.0
member this.X = x
member this.Y = y
member this.Z = z
let v1 = Vector(1.0, 2.0)
v1.Z // Evaluates to 0.
let v2 = Vector(1.0, 2.0, 3.0)
v2.Z // Evaluates to 3.0
</code></example>
val valuesToSkip: Set<string>
val rng: Random
Multiple items
type Random =
new: unit -> unit + 1 overload
member Next: unit -> int + 2 overloads
member NextBytes: buffer: byte[] -> unit + 1 overload
member NextDouble: unit -> float
member NextInt64: unit -> int64 + 2 overloads
member NextSingle: unit -> float32
static member Shared: Random
<summary>Represents a pseudo-random number generator, which is an algorithm that produces a sequence of numbers that meet certain statistical requirements for randomness.</summary>
--------------------
Random() : Random
Random(Seed: int) : Random
val digits: char[]
val lowerLetters: char[]
val upperLetters: char[]
val getRandomChar: (char -> char)
val c: char
Multiple items
val char: value: 'T -> char (requires member op_Explicit)
<summary>Converts the argument to character. Numeric inputs are converted according to the UTF-16
encoding for characters. String inputs must be exactly one character long. For other
input types the operation requires an appropriate static conversion method on the input type.</summary>
<param name="value">The input value.</param>
<returns>The converted char.</returns>
<example id="char-example"><code lang="fsharp"></code></example>
--------------------
[<Struct>]
type char = Char
<summary>An abbreviation for the CLI type <see cref="T:System.Char" />.</summary>
<category>Basic Types</category>
[<Struct>]
type Char =
member CompareTo: value: char -> int + 1 overload
member Equals: obj: char -> bool + 1 overload
member GetHashCode: unit -> int
member GetTypeCode: unit -> TypeCode
member ToString: unit -> string + 2 overloads
static member ConvertFromUtf32: utf32: int -> string
static member ConvertToUtf32: highSurrogate: char * lowSurrogate: char -> int + 1 overload
static member GetNumericValue: c: char -> float + 1 overload
static member GetUnicodeCategory: c: char -> UnicodeCategory + 1 overload
static member IsAscii: c: char -> bool
...
<summary>Represents a character as a UTF-16 code unit.</summary>
Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
Char.IsLetter(c: char) : bool
Char.IsLetter(s: string, index: int) : bool
Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool
val randomize: (string -> String)
val str: string
Multiple items
val string: value: 'T -> string
<summary>Converts the argument to a string using <c>ToString</c>.</summary>
<remarks>For standard integer and floating point values the and any type that implements <c>IFormattable</c><c>ToString</c> conversion uses <c>CultureInfo.InvariantCulture</c>. </remarks>
<param name="value">The input value.</param>
<returns>The converted string.</returns>
<example id="string-example"><code lang="fsharp"></code></example>
--------------------
type string = String
<summary>An abbreviation for the CLI type <see cref="T:System.String" />.</summary>
<category>Basic Types</category>
Multiple items
type String =
interface IEnumerable<char>
interface IEnumerable
interface ICloneable
interface IComparable
interface IComparable<string>
interface IConvertible
interface IEquatable<string>
new: value: nativeptr<char> -> unit + 8 overloads
member Clone: unit -> obj
member CompareTo: value: obj -> int + 1 overload
...
<summary>Represents text as a sequence of UTF-16 code units.</summary>
--------------------
String(value: nativeptr<char>) : String
String(value: char[]) : String
String(value: ReadOnlySpan<char>) : String
String(value: nativeptr<sbyte>) : String
String(c: char, count: int) : String
String(value: nativeptr<char>, startIndex: int, length: int) : String
String(value: char[], startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : String
String.ToCharArray() : char[]
String.ToCharArray(startIndex: int, length: int) : char[]
type Array =
interface ICollection
interface IEnumerable
interface IList
interface IStructuralComparable
interface IStructuralEquatable
interface ICloneable
member Clone: unit -> obj
member CopyTo: array: Array * index: int -> unit + 1 overload
member GetEnumerator: unit -> IEnumerator
member GetLength: dimension: int -> int
...
<summary>Provides methods for creating, manipulating, searching, and sorting arrays, thereby serving as the base class for all arrays in the common language runtime.</summary>
val map: mapping: ('T -> 'U) -> array: 'T[] -> 'U[]
<summary>Builds a new array whose elements are the results of applying the given function
to each of the elements of the array.</summary>
<param name="mapping">The function to transform elements of the array.</param>
<param name="array">The input array.</param>
<returns>The array of transformed elements.</returns>
<exception cref="T:System.ArgumentNullException">Thrown when the input array is null.</exception>
<example id="map-1"><code lang="fsharp">
let inputs = [| "a"; "bbb"; "cc" |]
inputs |> Array.map (fun x -> x.Length)
</code>
Evaluates to <c>[| 1; 3; 2 |]</c></example>
val isType: (Type -> Runtime.StructuralTypes.InferedType -> bool)
val testType: Type
val typ: Runtime.StructuralTypes.InferedType
Multiple items
namespace FSharp.Data.Runtime
--------------------
namespace System.Runtime
val typ: Type
val anonymize: (JsonValue -> JsonValue)
val json: JsonValue
type JsonValue =
| String of string
| Number of decimal
| Float of float
| Record of properties: (string * JsonValue)[]
| Array of elements: JsonValue[]
| Boolean of bool
| Null
member Request: url: string * ?httpMethod: string * ?headers: seq<string * string> -> HttpResponse
member RequestAsync: url: string * ?httpMethod: string * ?headers: seq<string * string> -> Async<HttpResponse>
member ToString: saveOptions: JsonSaveOptions -> string + 1 overload
member WriteTo: w: TextWriter * saveOptions: JsonSaveOptions -> unit
static member AsyncLoad: uri: string * ?encoding: Encoding -> Async<JsonValue>
static member Load: stream: Stream -> JsonValue + 2 overloads
static member Parse: text: string -> JsonValue
static member ParseMultiple: text: string -> seq<JsonValue>
static member TryParse: text: string -> JsonValue option
<summary>
Represents a JSON value. Large numbers that do not fit in the
Decimal type are represented using the Float case, while
smaller numbers are represented as decimals to avoid precision loss.
</summary>
union case JsonValue.String: string -> JsonValue
val s: string
member Set.Contains: value: 'T -> bool
Multiple items
type CultureInfo =
interface ICloneable
interface IFormatProvider
new: culture: int -> unit + 3 overloads
member ClearCachedData: unit -> unit
member Clone: unit -> obj
member Equals: value: obj -> bool
member GetConsoleFallbackUICulture: unit -> CultureInfo
member GetFormat: formatType: Type -> obj
member GetHashCode: unit -> int
member ToString: unit -> string
...
<summary>Provides information about a specific culture (called a locale for unmanaged code development). The information includes the names for the culture, the writing system, the calendar used, the sort order of strings, and formatting for dates and numbers.</summary>
--------------------
CultureInfo(culture: int) : CultureInfo
CultureInfo(name: string) : CultureInfo
CultureInfo(culture: int, useUserOverride: bool) : CultureInfo
CultureInfo(name: string, useUserOverride: bool) : CultureInfo
property CultureInfo.InvariantCulture: CultureInfo with get
<summary>Gets the <see cref="T:System.Globalization.CultureInfo" /> object that is culture-independent (invariant).</summary>
<returns>The object that is culture-independent (invariant).</returns>
union case Option.None: Option<'T>
<summary>The representation of "No value"</summary>
val typeof<'T> : Type
<summary>Generate a System.Type runtime representation of a static type.</summary>
<example id="typeof-example"><code lang="fsharp">
let t = typeof<int> // Gets the System.Type
t.FullName // Evaluates to "System.Int32"
</code></example>
Multiple items
[<Struct>]
type Guid =
new: b: byte[] -> unit + 5 overloads
member CompareTo: value: Guid -> int + 1 overload
member Equals: g: Guid -> bool + 1 overload
member GetHashCode: unit -> int
member ToByteArray: unit -> byte[]
member ToString: unit -> string + 2 overloads
member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> -> bool
member TryWriteBytes: destination: Span<byte> -> bool
static member (<>) : a: Guid * b: Guid -> bool
static member (=) : a: Guid * b: Guid -> bool
...
<summary>Represents a globally unique identifier (GUID).</summary>
--------------------
Guid ()
Guid(b: byte[]) : Guid
Guid(b: ReadOnlySpan<byte>) : Guid
Guid(g: string) : Guid
Guid(a: int, b: int16, c: int16, d: byte[]) : Guid
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid.NewGuid() : Guid
Multiple items
[<Struct>]
type DateTime =
new: year: int * month: int * day: int -> unit + 10 overloads
member Add: value: TimeSpan -> DateTime
member AddDays: value: float -> DateTime
member AddHours: value: float -> DateTime
member AddMilliseconds: value: float -> DateTime
member AddMinutes: value: float -> DateTime
member AddMonths: months: int -> DateTime
member AddSeconds: value: float -> DateTime
member AddTicks: value: int64 -> DateTime
member AddYears: value: int -> DateTime
...
<summary>Represents an instant in time, typically expressed as a date and time of day.</summary>
--------------------
DateTime ()
(+0 other overloads)
DateTime(ticks: int64) : DateTime
(+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : DateTime
(+0 other overloads)
DateTime(year: int, month: int, day: int) : DateTime
(+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Calendar) : DateTime
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : DateTime
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : DateTime
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Calendar) : DateTime
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : DateTime
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : DateTime
(+0 other overloads)
val prefix: string
String.StartsWith(value: string) : bool
String.StartsWith(value: char) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: CultureInfo) : bool
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
union case JsonValue.Number: decimal -> JsonValue
val d: decimal
Decimal.ToString() : string
Decimal.ToString(format: string) : string
Decimal.ToString(provider: IFormatProvider) : string
Decimal.ToString(format: string, provider: IFormatProvider) : string
Multiple items
[<Struct>]
type Decimal =
new: value: float -> unit + 8 overloads
member CompareTo: value: decimal -> int + 1 overload
member Equals: value: decimal -> bool + 2 overloads
member GetHashCode: unit -> int
member GetTypeCode: unit -> TypeCode
member ToString: unit -> string + 3 overloads
member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> * ?provider: IFormatProvider -> bool
static member (%) : d1: decimal * d2: decimal -> decimal
static member ( * ) : d1: decimal * d2: decimal -> decimal
static member (+) : d1: decimal * d2: decimal -> decimal
...
<summary>Represents a decimal floating-point number.</summary>
--------------------
Decimal ()
Decimal(value: float) : Decimal
Decimal(value: int) : Decimal
Decimal(bits: int[]) : Decimal
Decimal(value: int64) : Decimal
Decimal(bits: ReadOnlySpan<int>) : Decimal
Decimal(value: float32) : Decimal
Decimal(value: uint32) : Decimal
Decimal(value: uint64) : Decimal
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : Decimal
Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles) : decimal
Decimal.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : decimal
Decimal.Parse(s: ReadOnlySpan<char>, ?style: NumberStyles, ?provider: IFormatProvider) : decimal
union case JsonValue.Float: float -> JsonValue
val f: float
Double.ToString() : string
Double.ToString(format: string) : string
Double.ToString(provider: IFormatProvider) : string
Double.ToString(format: string, provider: IFormatProvider) : string
[<Struct>]
type Double =
member CompareTo: value: float -> int + 1 overload
member Equals: obj: float -> bool + 1 overload
member GetHashCode: unit -> int
member GetTypeCode: unit -> TypeCode
member ToString: unit -> string + 3 overloads
member TryFormat: destination: Span<char> * charsWritten: byref<int> * ?format: ReadOnlySpan<char> * ?provider: IFormatProvider -> bool
static member (<) : left: float * right: float -> bool
static member (<=) : left: float * right: float -> bool
static member (<>) : left: float * right: float -> bool
static member (=) : left: float * right: float -> bool
...
<summary>Represents a double-precision floating-point number.</summary>
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles) : float
Double.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : float
Double.Parse(s: ReadOnlySpan<char>, ?style: NumberStyles, ?provider: IFormatProvider) : float
union case JsonValue.Boolean: bool -> JsonValue
union case JsonValue.Null: JsonValue
union case JsonValue.Record: properties: (string * JsonValue)[] -> JsonValue
val props: (string * JsonValue)[]
val key: string
val value: JsonValue
val newValue: JsonValue
union case JsonValue.Array: elements: JsonValue[] -> JsonValue
Multiple items
val array: JsonValue[]
--------------------
type 'T array = 'T[]
<summary>Single dimensional, zero-based arrays, written <c>int[]</c>, <c>string[]</c> etc.</summary>
<remarks>Use the values in the <see cref="T:Microsoft.FSharp.Collections.ArrayModule" /> module to manipulate values
of this type, or the notation <c>arr.[x]</c> to get/set array
values.</remarks>
<category>Basic Types</category>
static member JsonValue.Load: reader: IO.TextReader -> JsonValue
static member JsonValue.Load: stream: IO.Stream -> JsonValue
static member JsonValue.Load: uri: string * ?encoding: Text.Encoding -> JsonValue
val printfn: format: Printf.TextWriterFormat<'T> -> 'T
<summary>Print to <c>stdout</c> using the given format, and add a newline.</summary>
<param name="format">The formatter.</param>
<returns>The formatted result.</returns>
<example>See <c>Printf.printfn</c> (link: <see cref="M:Microsoft.FSharp.Core.PrintfModule.PrintFormatLine``1" />) for examples.</example>
val anonymizedJson: JsonValue