ParserGetText Method (Int32, TextOptions) |
Namespace: GroupDocs.Parser
The following example shows how to extract a raw text from the document page:
// Create an instance of Parser class using(Parser parser = new Parser(filePath)) { // Check if the document supports text extraction if(!parser.Features.Text) { Console.WriteLine("Document isn't supports text extraction."); return; } // Get the document info DocumentInfo documentInfo = parser.GetDocumentInfo() as DocumentInfo; // Check if the document has pages if(documentInfo == null || documentInfo.RawPageCount == 0) { Console.WriteLine("Document hasn't pages."); return; } // Iterate over pages for(int p = 0; p<documentInfo.RawPageCount; p++) { // Print a page number Console.WriteLine(string.Format("Page {0}/{1}", p + 1, documentInfo.RawPageCount)); // Extract a text into the reader using(TextReader reader = parser.GetText(p, new TextOptions(true))) { // Print a text from the document // We ignore null-checking as we have checked text extraction feature support earlier Console.WriteLine(reader.ReadToEnd()); } } }