go-xdfx2csv/main.go

98 lines
1.9 KiB
Go
Raw Normal View History

2022-05-02 01:48:35 +03:00
package main
import (
"bufio"
"fmt"
"log"
"os"
"strings"
"sync"
"time"
)
var (
full_name string
lang_from string
lang_to string
)
const (
FIRST_LINES = 5
)
func main() {
f, err := os.OpenFile("dict.xdxf", os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
fullFile(f)
}
func fullFile(file *os.File) {
sc := bufio.NewScanner(file)
/*for sc.Scan() {
fmt.Println(sc.Text())
}*/
if err := sc.Err(); err != nil {
log.Fatalf("scan file error: %v", err)
return
}
//skip first lines with some unnessesary data and save full name
first:=""
for sc.Scan() {
line := sc.Text()
if strings.Contains(line, "<full_name>") {
line = strings.ReplaceAll(line, "full_name>", "")
line = strings.ReplaceAll(line, "</", "")
full_name = strings.ReplaceAll(line, "<", "")
}
if strings.Contains(line, "lang_from=") && strings.Contains(line, "lang_to=") {
tmp := strings.Split(line, "=")
lang_from = strings.Split(tmp[1], " ")[0]
lang_to = strings.Split(tmp[2], " ")[0]
}
if strings.Contains(line, "<ar><k>") {
first= line
break
}
}
fmt.Printf("Dict name: %s.\n", full_name)
fmt.Printf("From %s to %s. \n", lang_from, lang_to)
fmt.Println(first)
// read file line by line
var (
word string
translation string
transcription string
)
for sc.Scan() {
line := sc.Text()
if strings.Contains(line, "<ar>") {
word = ""
translation = ""
transcription = ""
fmt.Println(word, translation, transcription)
}
if strings.Contains(line, "<k>") {
_, word, _ = strings.Cut(line, "<k>")
}
fmt.Println(sc.Text())
}
}
func processChunk(chunk []byte, linesPool *sync.Pool, stringPool *sync.Pool, start time.Time, end time.Time) {
}
//readFile opens file in chunks and process to other func. https://medium.com/swlh/processing-16gb-file-in-seconds-go-lang-3982c235dfa2
func readFile() {
}