Mapping Splunk Data

To map the data to cypienta input, use the Vector Remap Language (VRL).

Using the python script below, we will get a file.

splunk_input.json: This file can be used as the input to the cypienta product. This file should be uploaded to input/ folder on S3 bucket on which the cypienta product is set up.

import pandas as pd
import os
import json

file_to_read = "vrl_transformed_alerts.json"
file_to_save = "splunk_input.json"
rule_mapping_file = "alert_to_rule.json"

# Read the VRL output file
df = pd.read_json(file_to_read, lines=True)

# Keep columns which maps to cypienta input
keep_cols = ["id", "name", "src", "dst", "time", "tech", "other_attributes_dict"]

# filter rows which do not have values in required fields
empty_ids = df["id"].isna().any()
empty_time = df["time"].isna().any()
empty_src = df["src"].isna().any()
empty_dst = df["dst"].isna().any()

if empty_ids:
    print("Found empty ids in the alerts. Skipping alert.")
if empty_time:
    print("Found empty time in the alerts. Skipping alert.")
if empty_src:
    print("Found empty src in the alerts. Skipping alert.")
if empty_dst:
    print("Found empty dst in the alerts. Skipping alert.")

df = df[~df["id"].isna()]
df = df[~df["time"].isna()]
df = df[~df["dst"].isna()]
df = df[~df["src"].isna()]

df_input = df[keep_cols]

df_input.to_json(file_to_save, orient="records")

os.remove(file_to_read)

# save cypienta input file as json
alert_list = json.load(open(file_to_save, "r"))
json.dump({"input": alert_list}, open(file_to_save, "w"))