Go Text Splitter

DimmuBurgor · 26.07.2023

This program works to split source code into a pre-determined amount of characters handy for character limit compliance etc simple nothing special =)

Код:

package main

import (
    "bufio"
    "fmt"
    "os"
    "path/filepath"
    "strings"
)

const maxCharactersPerFile = 3890

func main() {
fmt. Print("Enter the file path: ")
scanner := bufio. NewScanner(os. Stdin)
scanner. Scan()
filePath := scanner. Text()

file, err := os. Open(filePath)
    if err != nil {
fmt. Println("Error opening file:", err)
        return
    }
defer file. Close()

scanner = bufio. NewScanner(file)

    fileCounter := 1

    outFile, err := createOutputFile(filePath, fileCounter)
    if err != nil {
fmt. Println("Error creating output file:", err)
        return
    }
    defer outFile.Close()

    currentCount := 0

for scanner. Scan() {
line := scanner. Text()
        lineLength := len(line)

        if currentCount+lineLength > maxCharactersPerFile {
            outFile.Close()

            fileCounter++
            outFile, err = createOutputFile(filePath, fileCounter)
            if err != nil {
fmt. Println("Error creating output file:", err)
                return
            }

            currentCount = 0
        }

        _, err = outFile.WriteString(line + "\n")
        if err != nil {
fmt. Println("Error writing to output file:", err)
            return
        }

    }

if err := scanner. Err(); err != nil {
fmt. Println("Error reading input:", err)
        return
    }

fmt. Printf("Files created successfully: %d files\n", fileCounter)
}

func createOutputFile(filePath string, fileCounter int) (*os. File, error) {
fileExt := filepath. Ext(filePath)

newFileName := strings. TrimSuffix(filePath, fileExt) + fmt. Sprintf("%d", fileCounter) + fileExt

outFile, err := os. Create(newFileName)
    if err != nil {
        return nil, err
    }

    return outFile, nil
}

ckat_soft · 26.07.2023

DimmuBurgor сказал(а):

This program works to split source code into a pre-determined amount of characters handy for character limit compliance etc simple nothing special =)

Код:

package main

import (
    "bufio"
    "fmt"
    "os"
    "path/filepath"
    "strings"
)

const maxCharactersPerFile = 3890

func main() {
fmt. Print("Enter the file path: ")
scanner := bufio. NewScanner(os. Stdin)
scanner. Scan()
filePath := scanner. Text()

file, err := os. Open(filePath)
    if err != nil {
fmt. Println("Error opening file:", err)
        return
    }
defer file. Close()

scanner = bufio. NewScanner(file)

    fileCounter := 1

    outFile, err := createOutputFile(filePath, fileCounter)
    if err != nil {
fmt. Println("Error creating output file:", err)
        return
    }
    defer outFile.Close()

    currentCount := 0

for scanner. Scan() {
line := scanner. Text()
        lineLength := len(line)

        if currentCount+lineLength > maxCharactersPerFile {
            outFile.Close()

            fileCounter++
            outFile, err = createOutputFile (filePath, fileCounter)
            если ошибка != ноль {
ФМТ. Println("Ошибка создания выходного файла:", ошибка)
                возвращаться
            }

            текущийсчет = 0
        }

        _, ошибка = outFile.WriteString(строка + "\n")
        если ошибка != ноль {
ФМТ. Println("Ошибка записи в выходной файл:", ошибка)
            возвращаться
        }

    }

если ошибка := сканер. Ошибиться(); ошибка != ноль {
ФМТ. Println("Ошибка чтения ввода:", ошибка)
        возвращаться
    }

ФМТ. Printf("Файлы успешно созданы: %d файлов\n", fileCounter)
}

func createOutputFile (filePath string, fileCounter int) (*os. File, error) {
fileExt := путь к файлу. Расширение (путь к файлу)

новоеИмяФайла := строки. TrimSuffix(filePath, fileExt) + fmt. Sprintf("%d", fileCounter) + fileExt

outFile, err := os. Создать (новое имя файла)
    если ошибка != ноль {
        вернуть ноль, ошибиться
    }

    вернуть outFile, ноль
}


[/код]
[/QUOTE]
какая польза от этой программы?

Dread Pirate Roberts · 26.07.2023

might be handy for *ndows, but *nix users have split already:

Код:

bash-4.4$ split --help
Usage: split [OPTION]... [FILE [PREFIX]]
Output pieces of FILE to PREFIXaa, PREFIXab, ...;
default size is 1000 lines, and default PREFIX is 'x'.

With no FILE, or when FILE is -, read standard input.

Mandatory arguments to long options are mandatory for short options too.
  -a, --suffix-length=N   generate suffixes of length N (default 2)
      --additional-suffix=SUFFIX  append an additional SUFFIX to file names
  -b, --bytes=SIZE        put SIZE bytes per output file
  -C, --line-bytes=SIZE   put at most SIZE bytes of records per output file
  -d                      use numeric suffixes starting at 0, not alphabetic
      --numeric-suffixes[=FROM]  same as -d, but allow setting the start value
  -x                      use hex suffixes starting at 0, not alphabetic
      --hex-suffixes[=FROM]  same as -x, but allow setting the start value
  -e, --elide-empty-files  do not generate empty output files with '-n'
      --filter=COMMAND    write to shell COMMAND; file name is $FILE
  -l, --lines=NUMBER      put NUMBER lines/records per output file
  -n, --number=CHUNKS     generate CHUNKS output files; see explanation below
  -t, --separator=SEP     use SEP instead of newline as the record separator;
                            '\0' (zero) specifies the NUL character
  -u, --unbuffered        immediately copy input to output with '-n r/...'
      --verbose           print a diagnostic just before each
                            output file is opened
      --help     display this help and exit
      --version  output version information and exit

The SIZE argument is an integer and optional unit (example: 10K is 10*1024).
Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000).
Binary prefixes can be used, too: KiB=K, MiB=M, and so on.

CHUNKS may be:
  N       split into N files based on size of input
  K/N     output Kth of N to stdout
  l/N     split into N files without splitting lines/records
  l/K/N   output Kth of N to stdout without splitting lines/records
  r/N     like 'l' but use round robin distribution
  r/K/N   likewise but only output Kth of N to stdout

GNU coreutils online help: <https://www.gnu.org/software/coreutils/>
Full documentation <https://www.gnu.org/software/coreutils/split>
or available locally via: info '(coreutils) split invocation'

ckat_soft · 26.07.2023

Dread Pirate Roberts сказал(а):

might be handy for *ndows, but *nix users have split already:

Код:

bash-4.4$ split --help
Usage: split [OPTION]... [FILE [PREFIX]]
Output pieces of FILE to PREFIXaa, PREFIXab, ...;
default size is 1000 lines, and default PREFIX is 'x'.

With no FILE, or when FILE is -, read standard input.

Mandatory arguments to long options are mandatory for short options too.
  -a, --suffix-length=N   generate suffixes of length N (default 2)
      --additional-suffix=SUFFIX  append an additional SUFFIX to file names
  -b, --bytes=SIZE        put SIZE bytes per output file
  -C, --line-bytes=SIZE   put at most SIZE bytes of records per output file
  -d                      use numeric suffixes starting at 0, not alphabetic
      --numeric-suffixes[=FROM]  same as -d, but allow setting the start value
  -x использовать шестнадцатеричные суффиксы, начинающиеся с 0, а не буквенные
      --hex-suffixes[=FROM] то же, что и -x, но позволяет установить начальное значение
  -e, --elide-empty-files не генерируют пустые выходные файлы с '-n'
      --filter=КОМАНДА записать в оболочку КОМАНДА; имя файла $FILE
  -l, --lines=ЧИСЛО поместить ЧИСЛО строк/записей в выходной файл
  -n, --number=CHUNKS генерировать выходные файлы CHUNKS; см. объяснение ниже
  -t, --separator=SEP использовать SEP вместо новой строки в качестве разделителя записей;
                            '\0' (ноль) указывает символ NUL
  -u, --unbuffered немедленно скопировать ввод в вывод с помощью '-nr/...'
      --verbose вывести диагностику непосредственно перед каждым
                            выходной файл открыт
      --help показать эту справку и выйти
      --version вывести информацию о версии и выйти

Аргумент SIZE представляет собой целое число и необязательную единицу измерения (пример: 10 КБ равно 10*1024).
Единицы: K,M,G,T,P,E,Z,Y (степень 1024) или KB,MB,... (степень 1000).
Также можно использовать двоичные префиксы: KiB=K, MiB=M и так далее.

ЧАНКИ могут быть:
  N разбит на N файлов в зависимости от размера ввода
  K/N вывод Kth из N на стандартный вывод
  l/N разделить на N файлов без разделения строк/записей
  l/K/N вывод Kth из N на стандартный вывод без разделения строк/записей
  r/N как 'l', но используйте круговое распределение
  r/K/N аналогично, но выводит только Kth из N на стандартный вывод

Онлайн-справка GNU coreutils: <https://www.gnu.org/software/coreutils/>
Полная документация <https://www.gnu.org/software/coreutils/split>
или доступно локально через: info '(coreutils) split invocation'

ну под винду уже много разных утилит есть

DimmuBurgor · 26.07.2023

Dread Pirate Roberts сказал(а):

might be handy for *ndows, but *nix users have split already:

Код:

bash-4.4$ split --help
Usage: split [OPTION]... [FILE [PREFIX]]
Output pieces of FILE to PREFIXaa, PREFIXab, ...;
default size is 1000 lines, and default PREFIX is 'x'.

With no FILE, or when FILE is -, read standard input.

Mandatory arguments to long options are mandatory for short options too.
  -a, --suffix-length=N   generate suffixes of length N (default 2)
      --additional-suffix=SUFFIX  append an additional SUFFIX to file names
  -b, --bytes=SIZE        put SIZE bytes per output file
  -C, --line-bytes=SIZE   put at most SIZE bytes of records per output file
  -d                      use numeric suffixes starting at 0, not alphabetic
      --numeric-suffixes[=FROM]  same as -d, but allow setting the start value
  -x                      use hex suffixes starting at 0, not alphabetic
      --hex-suffixes[=FROM]  same as -x, but allow setting the start value
  -e, --elide-empty-files  do not generate empty output files with '-n'
      --filter=COMMAND    write to shell COMMAND; file name is $FILE
  -l, --lines=NUMBER      put NUMBER lines/records per output file
  -n, --number=CHUNKS     generate CHUNKS output files; see explanation below
  -t, --separator=SEP     use SEP instead of newline as the record separator;
                            '\0' (zero) specifies the NUL character
  -u, --unbuffered        immediately copy input to output with '-n r/...'
      --verbose           print a diagnostic just before each
                            output file is opened
      --help     display this help and exit
      --version  output version information and exit

The SIZE argument is an integer and optional unit (example: 10K is 10*1024).
Units are K,M,G,T,P,E,Z,Y (powers of 1024) or KB,MB,... (powers of 1000).
Binary prefixes can be used, too: KiB=K, MiB=M, and so on.

CHUNKS may be:
  N       split into N files based on size of input
  K/N     output Kth of N to stdout
  l/N     split into N files without splitting lines/records
  l/K/N   output Kth of N to stdout without splitting lines/records
  r/N     like 'l' but use round robin distribution
  r/K/N   likewise but only output Kth of N to stdout

GNU coreutils online help: <https://www.gnu.org/software/coreutils/>
Full documentation <https://www.gnu.org/software/coreutils/split>
or available locally via: info '(coreutils) split invocation'

Yep it's for windows, and but you could just as well use the method you described with m2sys or wsl, or script block

DimmuBurgor · 27.07.2023

ckat_soft the main thing was to learn to make it myself since my skill and experience writing code has a long way to go
thank you for sharing resources for other tools though and for your input =)

Go Text Splitter

DimmuBurgor

CPU register

ckat_soft

(L3) cache

Dread Pirate Roberts

Премиум

ckat_soft

(L3) cache

DimmuBurgor

CPU register

DimmuBurgor

CPU register