converter xdfx to csv
This commit is contained in:
parent
8cf21b5e62
commit
07cda3e10b
123
main.go
123
main.go
|
@ -2,37 +2,47 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"encoding/csv"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type Words struct {
|
||||||
|
word string
|
||||||
|
translation string
|
||||||
|
transcription string
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
full_name string
|
full_name string
|
||||||
lang_from string
|
lang_from string
|
||||||
lang_to string
|
lang_to string
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
FIRST_LINES = 5
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
f, err := os.OpenFile("dict.xdxf", os.O_RDONLY, os.ModePerm)
|
fcsv, err := os.Create("./dict.csv")
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
defer fcsv.Close()
|
||||||
|
|
||||||
|
writer := csv.NewWriter(fcsv)
|
||||||
|
writer.Comma = '|'
|
||||||
|
|
||||||
|
fxdfx, err := os.OpenFile("dict.xdxf", os.O_RDONLY, os.ModePerm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer fxdfx.Close()
|
||||||
|
|
||||||
fullFile(f)
|
fullFile(fxdfx, writer)
|
||||||
}
|
}
|
||||||
|
|
||||||
func fullFile(file *os.File) {
|
func fullFile(fxdfx *os.File, fcsv *csv.Writer) {
|
||||||
sc := bufio.NewScanner(file)
|
sc := bufio.NewScanner(fxdfx)
|
||||||
/*for sc.Scan() {
|
/*for sc.Scan() {
|
||||||
fmt.Println(sc.Text())
|
fmt.Println(sc.Text())
|
||||||
}*/
|
}*/
|
||||||
|
@ -42,7 +52,7 @@ func fullFile(file *os.File) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//skip first lines with some unnessesary data and save full name
|
//skip first lines with some unnessesary data and save full name
|
||||||
first:=""
|
first := ""
|
||||||
for sc.Scan() {
|
for sc.Scan() {
|
||||||
line := sc.Text()
|
line := sc.Text()
|
||||||
if strings.Contains(line, "<full_name>") {
|
if strings.Contains(line, "<full_name>") {
|
||||||
|
@ -57,41 +67,90 @@ func fullFile(file *os.File) {
|
||||||
lang_to = strings.Split(tmp[2], " ")[0]
|
lang_to = strings.Split(tmp[2], " ")[0]
|
||||||
}
|
}
|
||||||
if strings.Contains(line, "<ar><k>") {
|
if strings.Contains(line, "<ar><k>") {
|
||||||
first= line
|
first = line
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("Dict name: %s.\n", full_name)
|
fmt.Printf("Dict name: %s.\n", full_name)
|
||||||
fmt.Printf("From %s to %s. \n", lang_from, lang_to)
|
fmt.Printf("From %s to %s. \n", lang_from, lang_to)
|
||||||
fmt.Println(first)
|
//fmt.Println(first)
|
||||||
// read file line by line
|
|
||||||
var (
|
|
||||||
word string
|
|
||||||
translation string
|
|
||||||
transcription string
|
|
||||||
)
|
|
||||||
|
|
||||||
for sc.Scan() {
|
// read file line by line
|
||||||
line := sc.Text()
|
neww := new(Words)
|
||||||
if strings.Contains(line, "<ar>") {
|
for {
|
||||||
word = ""
|
first = strings.TrimSpace(first)
|
||||||
translation = ""
|
if first == "" {
|
||||||
transcription = ""
|
if sc.Scan() {
|
||||||
fmt.Println(word, translation, transcription)
|
first += sc.Text()
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.Contains(first, "<ar>") {
|
||||||
|
neww.word = ""
|
||||||
|
neww.translation = ""
|
||||||
|
neww.transcription = ""
|
||||||
|
//fmt.Println(neww)
|
||||||
|
}
|
||||||
|
if strings.Contains(first, "<k>") {
|
||||||
|
_, neww.word, _ = strings.Cut(first, "<k>")
|
||||||
|
if strings.Contains(neww.word, "</k>") {
|
||||||
|
neww.word, first, _ = strings.Cut(neww.word, "</k>")
|
||||||
|
first = strings.TrimSpace(first)
|
||||||
|
if first == "" {
|
||||||
|
if sc.Scan() {
|
||||||
|
first += sc.Text()
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if strings.Contains(first, "<tr>") {
|
||||||
|
_, neww.transcription, _ = strings.Cut(first, "<tr>")
|
||||||
|
if strings.Contains(neww.transcription, "</tr>") {
|
||||||
|
neww.transcription, first, _ = strings.Cut(neww.transcription, "</tr>")
|
||||||
|
first = strings.TrimSpace(first)
|
||||||
|
if first == "" {
|
||||||
|
if sc.Scan() {
|
||||||
|
first += sc.Text()
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if strings.Contains(first, "</ar>") {
|
||||||
|
neww.translation, first, _ = strings.Cut(first, "</ar>")
|
||||||
|
first = strings.TrimSpace(first)
|
||||||
|
first = strings.TrimSpace(first)
|
||||||
|
if first == "" {
|
||||||
|
if sc.Scan() {
|
||||||
|
//fmt.Printf("Word: %s. Transcription: %s. Translation: %s.\n", neww.word, neww.transcription, neww.translation)
|
||||||
|
err := fcsv.Write([]string{neww.word, neww.transcription, neww.translation})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
first += sc.Text()
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if sc.Scan() {
|
||||||
|
//fmt.Printf("Word: %s. Transcription: %s. Translation: %s.\n", neww.word, neww.transcription, neww.translation)
|
||||||
|
first += sc.Text()
|
||||||
|
} else {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
if strings.Contains(line, "<k>") {
|
|
||||||
_, word, _ = strings.Cut(line, "<k>")
|
|
||||||
}
|
}
|
||||||
fmt.Println(sc.Text())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func processChunk(chunk []byte, linesPool *sync.Pool, stringPool *sync.Pool, start time.Time, end time.Time) {
|
/*func processChunk(chunk []byte, linesPool *sync.Pool, stringPool *sync.Pool, start time.Time, end time.Time) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//readFile opens file in chunks and process to other func. https://medium.com/swlh/processing-16gb-file-in-seconds-go-lang-3982c235dfa2
|
//readFile opens file in chunks and process to other func. https://medium.com/swlh/processing-16gb-file-in-seconds-go-lang-3982c235dfa2
|
||||||
func readFile() {
|
func readFile() {
|
||||||
|
|
||||||
}
|
}*/
|
||||||
|
|
Loading…
Reference in New Issue