From de8105ec6017c2da417ca55e1de606474c77fef3 Mon Sep 17 00:00:00 2001 From: Sean Sube Date: Wed, 5 Jul 2023 23:05:00 -0500 Subject: [PATCH] feat(scripts): add script to parse prompt books --- api/scripts/parse-prompts.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 api/scripts/parse-prompts.py diff --git a/api/scripts/parse-prompts.py b/api/scripts/parse-prompts.py new file mode 100644 index 00000000..2c977015 --- /dev/null +++ b/api/scripts/parse-prompts.py @@ -0,0 +1,34 @@ +from typing import List +from argparse import ArgumentParser +from sys import argv +from collections import Counter +from json import dumps + + +def parse_args(args: List[str]): + parser = ArgumentParser( + prog="onnx-web prompt parser", + description="count phrase frequency in prompt books", + ) + parser.add_argument("file", nargs="+", help="prompt files to parse") + return parser.parse_args(args) + + +def main(): + args = parse_args(argv[1:]) + + lines: List[str] = [] + for file in args.file: + with open(file, "r") as f: + lines.extend(f.readlines()) + + phrases = [] + for line in lines: + phrases.extend([p.lower().strip() for p in line.split(",")]) + + count = Counter(phrases) + print(dumps(dict(count.most_common()))) + + +if __name__ == "__main__": + main() \ No newline at end of file