Определение ориентации изображения документа в .NET
3 февраля 2022
/// <summary> /// Returns an orientation angle of document image using statistics for Latin symbols. /// </summary> /// <param name="filename">The path to a file with document image.</param> public static void GetDocumentImageOrientationUsingLatinSymbolStat(string filename) { // create an image collection using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection()) { // add images from file to the image collection images.Add(filename); // create an instance of GetTextOrientationCommand class Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand getTextOrientationCommand1 = new Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand(); // for each image in image collection for (int i = 0; i < images.Count; i++) { // get image Vintasoft.Imaging.VintasoftImage image = images[i]; // determine orientation of document image using statistics for Latin symbols getTextOrientationCommand1.ExecuteInPlace(image); // write result to the console System.Console.WriteLine(string.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}", System.IO.Path.GetFileName(filename), i, getTextOrientationCommand1.Orientation, getTextOrientationCommand1.Confidence)); } // free images images.ClearAndDisposeItems(); } }
/// <summary> /// Returns an orientation angle of document image using Tesseract OCR. /// </summary> /// <param name="filename">The path to a file with document image.</param> /// <param name="tesseractOcrDllDirectory">A path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located.</param> public static void GetDocumentImageOrientationUsingTesseractOCR(string filename, string tesseractOcrDllDirectory) { // create an image collection using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection()) { // add images from file to the image collection images.Add(filename); // create an instance of GetTesseractOcrTextOrientationCommand class using (Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand getTextOrientationCommand = new Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand()) { // specify path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located getTextOrientationCommand.TesseractOcrDllDirectory = tesseractOcrDllDirectory; // for each image in image collection for (int i = 0; i < images.Count; i++) { // get image Vintasoft.Imaging.VintasoftImage image = images[i]; // determine orientation of document image using Tesseract OCR getTextOrientationCommand.ExecuteInPlace(image); // write result to the console System.Console.WriteLine(string.Format("Filename: {0}, page: {1}, page orientation: {2}", System.IO.Path.GetFileName(filename), i, getTextOrientationCommand.Orientation)); } } // free images images.ClearAndDisposeItems(); } }
/// <summary> /// Returns an orientation angle of document image using statistics for Latin symbols and using Tesseract OCR. /// </summary> /// <param name="filename">The path to a file with document image.</param> /// <param name="tesseractOcrDllDirectory">A path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located.</param> public static void GetDocumentImageOrientationUsingLatinSymbolStatAndOcrTesseract(string filename, string tesseractOcrDllDirectory) { // create an image collection using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection()) { // add images from file to the image collection images.Add(filename); // create an instance of GetTextOrientationCommand class Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand getTextOrientationCommand1 = new Vintasoft.Imaging.ImageProcessing.Info.GetTextOrientationCommand(); // create an instance of GetTesseractOcrTextOrientationCommand class using (Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand getTextOrientationCommand2 = new Vintasoft.Imaging.ImageProcessing.Ocr.Tesseract.GetTesseractOcrTextOrientationCommand()) { // specify path to a directory, where Tesseract5.Vintasoft.xXX.dll files are located getTextOrientationCommand2.TesseractOcrDllDirectory = tesseractOcrDllDirectory; // for each image in image collection for (int i = 0; i < images.Count; i++) { // get image Vintasoft.Imaging.VintasoftImage image = images[i]; // determine orientation of document image using statistics for Latin symbols (works for Latin text only) getTextOrientationCommand1.ExecuteInPlace(image); // if orientation is detected and orientation result has high confidence if (getTextOrientationCommand1.Orientation != Vintasoft.Imaging.ImageProcessing.Info.ImageOrthogonalOrientation.Undefined && getTextOrientationCommand1.Confidence > 0.3) { // write result to the console System.Console.WriteLine(string.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}", System.IO.Path.GetFileName(filename), i, getTextOrientationCommand1.Orientation, getTextOrientationCommand1.Confidence)); } // if orientation is not detected or orientation result has low confidence else { // determine orientation of document image using Tesseract OCR (works for any text) getTextOrientationCommand2.ExecuteInPlace(image); // write result to the console System.Console.WriteLine(string.Format("Filename: {0}, page: {1}, page orientation: {2}, confidence: {3}", System.IO.Path.GetFileName(filename), i, getTextOrientationCommand2.Orientation, getTextOrientationCommand2.Confidence)); } } } // free images images.ClearAndDisposeItems(); } }