您的位置 首页 golang

2020-05-22 golang 处理pdf、

func rmWaterMark(inputPath, outputPath string) error {
f, err := os.Open(inputPath)
if err != nil {
return err
}

defer f.Close()//common.SetLogger(common.ConsoleLogger{LogLevel: 5})pdfWriter := pdf.NewPdfWriter()pdfReader, err := pdf.NewPdfReaderLazy(f)if err != nil {    return err}fileExt := filepath.Ext(f.Name())fileName := strings.TrimSuffix(filepath.Base(f.Name()), fileExt)println(fileName)isEncrypted, err := pdfReader.IsEncrypted()if err != nil {    return err}if isEncrypted {    _, err = pdfReader.Decrypt([]byte(""))    if err != nil {        return err    }}numPages, err := pdfReader.GetNumPages()if err != nil {    return err}//pdfWriter := pdf.NewPdfWriter()for i := 0; i < numPages; i++ {    pdfPage, _ := pdfReader.GetPage(i + 1)    fmt.Println("Start prcess page " + strconv.Itoa(i+1))    if pdfPage.Contents != nil {        typeOf := reflect.TypeOf(pdfPage.Contents)        typeOfEl := typeOf.Elem()        if typeOfEl == reflect.TypeOf(core.PdfObjectArray{}) {            arrays := pdfPage.Contents.(*core.PdfObjectArray)            if arrays.Len() > 0 {                //  fterx := arrays.Get(0).(*core.PdfObjectReference).Resolve().(core.PdfObject).(*core.PdfObjectStream)                for _, norContent := range arrays.Elements() {                    fterValue := norContent.(*core.PdfObjectReference).Resolve().(core.PdfObject).(*core.PdfObjectStream)                    filterTj(fterValue, false)                }            }        }        if typeOfEl == reflect.TypeOf(core.PdfObjectReference{}) {            filter := pdfPage.Contents.(*core.PdfObjectReference).Resolve().(*core.PdfObjectStream)            filterTj(filter, false)        }    }    pdfWriter.AddPage(pdfPage)}outFull := outputPath + "rmwaterMarker" + fileExtfmt.Println(outFull)fWrite, err := os.Create(outFull)if err != nil {    return err}defer fWrite.Close()err = pdfWriter.Write(fWrite)return nil

}

func filterTj(content *core.PdfObjectStream, left bool) {

fla, _ := core.DecodeStream(content)fmt.Println(string(fla))fmt.Println("proc==================================")cStreamParser := contentstream.NewContentStreamParser(string(fla))parsed, _ := cStreamParser.Parse()needreset := falsestartrm := falsealltype := ""for id, i2 := range * parsed {    if !strings.Contains(alltype, i2.Operand) {        alltype = alltype + " " + i2.Operand    }    //cCKkSQmcmlwreTfjBDCrigsTjMJdDoG    if i2.Params != nil && !(strings.Contains("BDC k ri gs f EMC q W n  Q BT Tf ET K w d S G Do Td", i2.Operand)) {        //        //if i2.Operand == "TJ" { //处理文字        //  pfoa := i2.Params[0].(*core.PdfObjectArray)        //  for _, ix := range pfoa.Elements() {        //      switch vv := ix.(type) {        //      case *core.PdfObjectString:        //          decoded := vv.Decoded()        //          if strings.ContainsAny(decoded, "FãFþGe") || strings.ContainsAny(decoded, " �   -   ") {        //              pfoa.Clear()        //              needreset = true        //          }        //        //      }        //  }        //        //} else        if i2.Operand == "Tm" { //处理文字            startrm = false            position := len(i2.Params) - 1            x := i2.Params[position]            y := i2.Params[position-1]            typex := reflect.TypeOf(x)            typey := reflect.TypeOf(y)            if typex.Elem() == reflect.TypeOf(core.PdfObjectFloat(0)) && (typey.Elem() == reflect.TypeOf(core.PdfObjectFloat(0))) {                if (* x.(*core.PdfObjectFloat) < 16 && * y.(*core.PdfObjectFloat) > 250 && *y.(*core.PdfObjectFloat) < 608) || validatekaKa(x.(*core.PdfObjectFloat), y.(*core.PdfObjectFloat)) {                    (*parsed)[id] = &contentstream.ContentStreamOperation{}                    needreset = true                    startrm = true                    fmt.Println("=============")                    fmt.Println(i2)                }            }        } else if i2.Operand == "cm" { //处理圆圈            startrm = false            position := len(i2.Params) - 1            x := i2.Params[position]            y := i2.Params[position-1]            typex := reflect.TypeOf(x)            typey := reflect.TypeOf(y)            if typex.Elem() == reflect.TypeOf(core.PdfObjectFloat(0)) && (typey.Elem() == reflect.TypeOf(core.PdfObjectFloat(0))) {                if validatekother(x.(*core.PdfObjectFloat), y.(*core.PdfObjectFloat)) {                    (*parsed)[id] = &contentstream.ContentStreamOperation{}                    needreset = true                    startrm = true                    fmt.Println("=============")                    fmt.Println(i2)                }            }        } else if i2.Operand == "l" || i2.Operand == "c" {            if startrm {                (*parsed)[id] = &contentstream.ContentStreamOperation{}                startrm = true                needreset = true                fmt.Println("=============")                fmt.Println(i2)            }            //fmt.Println(strconv.Itoa(i) )        } else if i2.Operand == "h" || i2.Operand == "re" || i2.Operand == "TJ" || i2.Operand == "Tj" {            if startrm {                (*parsed)[id] = &contentstream.ContentStreamOperation{}                fmt.Println("=============")                fmt.Println(i2)            }        }    }}fmt.Println(alltype)if needreset {    fmt.Println(string(parsed.Bytes()))    content.Stream, _ = core.NewFlateEncoder().EncodeBytes(parsed.Bytes())}//fmt.Println(xk)

}

func validatekaKa(y, x *core.PdfObjectFloat) bool {
if * y < 23.5 && * x > 12 { //x坐标小于12 y坐标大于30 6 0 0 6 24.5576019(x) 30.1304016(y) Tm这样的就不处理
return true
}
return false
}
func validatekother(y, x core.PdfObjectFloat) bool {
if ((
y < 378.5 && *y > 370) || *y < 18) && * x > 568 && *x < 608 {
return true
}
return false
}

func splitPdf(inputPath string, outputPath string, splitfiles int) error {

f, err := os.Open(inputPath)if err != nil {    return err}defer f.Close()pdfReader, err := pdf.NewPdfReaderLazy(f)if err != nil {    return err}fileExt := filepath.Ext(f.Name())fileName := strings.TrimSuffix(filepath.Base(f.Name()), fileExt)println(fileName)isEncrypted, err := pdfReader.IsEncrypted()if err != nil {    return err}if isEncrypted {    _, err = pdfReader.Decrypt([]byte(""))    if err != nil {        return err    }}numPages, err := pdfReader.GetNumPages()if err != nil {    return err}prefilePages := int(math.Ceil(float64(numPages) / float64(splitfiles)))println(strconv.Itoa(numPages) + " " + strconv.Itoa(prefilePages))for i := 0; i < splitfiles; i++ {    pdfWriter := pdf.NewPdfWriter()    for y := i * prefilePages; y < numPages && y < (i+1)*prefilePages; y++ {        pageNum := y + 1        println(pageNum)        page, err := pdfReader.GetPage(pageNum)        if err != nil {            return err        }        err = pdfWriter.AddPage(page)        if err != nil {            return err        }    }    outFile := outputPath + fileName + strconv.Itoa(i) + fileExt    println(outFile)    fWrite, err := os.Create(outFile)    if err != nil {        return err    }    err = pdfWriter.Write(fWrite)    fWrite.Close()    if err != nil {        return err    }}return nil

}

func mergePdf(inputFolder string, outputPath string) error {

var inputPaths []stringallFiles, err := ioutil.ReadDir(inputFolder)var fileExt stringfor _, file := range allFiles {    if !file.IsDir() {        fileFullPath := inputFolder + file.Name()        fileExt = filepath.Ext(fileFullPath)        inputPaths = append(inputPaths, fileFullPath)        println(fileFullPath)    }}pdfWriter := pdf.NewPdfWriter()for _, inputPath := range inputPaths {    f, err := os.Open(inputPath)    if err != nil {        return err    }    defer f.Close()    pdfReader, err := pdf.NewPdfReader(f)    if err != nil {        return err    }    isEncrypted, err := pdfReader.IsEncrypted()    if err != nil {        return err    }    if isEncrypted {        auth, err := pdfReader.Decrypt([]byte(""))        if err != nil {            return err        }        if !auth {            return errors.New("Cannot merge encrypted, password protected document")        }    }    numPages, err := pdfReader.GetNumPages()    if err != nil {        return err    }    for i := 0; i < numPages; i++ {        pageNum := i + 1        page, err := pdfReader.GetPage(pageNum)        if err != nil {            return err        }        err = pdfWriter.AddPage(page)        if err != nil {            return err        }    }}fWrite, err := os.Create(outputPath + "merged" + fileExt)if err != nil {    return err}defer fWrite.Close()err = pdfWriter.Write(fWrite)if err != nil {    return err}return nil

}


文章来源:智云一二三科技

文章标题:2020-05-22 golang 处理pdf、

文章地址:https://www.zhihuclub.com/627.shtml

关于作者: 智云科技

热门文章

发表评论

您的电子邮箱地址不会被公开。

网站地图