scantools
1.0.7
Graphics manipulation with a view towards scanned documents
|
20 #define HOCRDOCUMENT 1
23 #include "HOCRTextBox.h"
24 #include "resolution.h"
75 HOCRDocument(
const QImage &image, QStringList languages=QStringList()) {
read(image,languages);};
88 bool hasError()
const {
return !_error.isEmpty();};
96 QString
error()
const {
return _error; };
111 QSet<QString>
warnings()
const {
return _warnings;};
119 QSet<QString>
system()
const {
return _OCRSystem;};
136 QList<HOCRTextBox>
pages()
const {
return _pages;};
142 bool isEmpty()
const {
return _pages.isEmpty();};
163 if (_pages.size() > 0)
164 return _pages.takeFirst();
191 void read(
const QString& fileName);
210 void read(
const QImage &image,
const QStringList& languages=QStringList());
248 QString
toPDF(
const QString& fileName,
resolution _resolution,
const QString& title=QString(),
const QPageSize& overridePageSize=QPageSize(), QFont *overrideFont=0)
const;
264 QList<QImage>
toImages(QFont *overrideFont=0, QImage::Format format=QImage::Format_Grayscale8)
const;
308 QPageSize findPageSize(
int pageNumber,
resolution _resolution,
const QPageSize &overridePageSize)
const;
315 QSet<QString> _OCRSystem;
319 QSet<QString> _OCRCapabilities;
322 QList<HOCRTextBox> _pages;
325 QSet<QString> _warnings;
bool isEmpty() const
Returns true if the document contains no pages.
bool hasText() const
Check if the document does contain text.
QList< QImage > toImages(QFont *overrideFont=0, QImage::Format format=QImage::Format_Grayscale8) const
Export to images.
bool hasWarnings() const
Warning status.
HOCRDocument(QString fileName)
Constructs an HOCR document from a file.
QSet< QString > warnings() const
Warning messages.
void read(QIODevice *device)
Reads an HOCR document from a QIODevice.
void append(const HOCRDocument &other)
Appends other HOCRDocument.
HOCRDocument(const QImage &image, QStringList languages=QStringList())
Constructs an HOCR document by running the tesseract OCR engine.
QList< HOCRTextBox > pages() const
Pages in the document.
void read(const QImage &image, const QStringList &languages=QStringList())
Generates an HOCR document by running the tesseract OCR engine.
QString toPDF(const QString &fileName, resolution _resolution, const QString &title=QString(), const QPageSize &overridePageSize=QPageSize(), QFont *overrideFont=0) const
Export to PDF.
static bool areLanguagesSupportedByTesseract(const QStringList &lingos)
Check if languages are supported by tesseract.
static QStringList tesseractLanguages()
List of languages supported by tesseract.
Text box, as defined in an HOCR file.
QSet< QString > system() const
System(s) that generated this file.
The resolution class stores a resolution and converts between units.
void clear()
Resets the document.
HOCRTextBox takeFirstPage()
Removes the first page of the document and returns it.
QSet< QString > capabilities() const
OCR capabilites.
HOCRDocument(QIODevice *device)
Constructs an HOCR document from a QIODevice.
QString toText() const
Export this document as text.
QString error() const
Error message.
QFont suggestFont() const
Suggest font.
HOCRDocument()
Constructs an empty HOCR document.
Reads and interprets HOCR files, the standard output file format for Optical Character Recognition sy...
bool hasError() const
Error status.
void read(const QString &fileName)
Reads an HOCR document from a file.