Source code for fonduer.parser.models.paragraph

from sqlalchemy import Column, ForeignKey, Integer, String, UniqueConstraint
from sqlalchemy.orm import backref, relationship

from fonduer.parser.models.context import Context


[docs]class Paragraph(Context): """A paragraph Context in a Document. Represents a grouping of adjacent sentences. .. note:: As of v0.6.2, a text content in two properties ``.text`` and ``.tail`` turn into ``Paragraph``. See https://lxml.de/tutorial.html#elements-contain-text for details about ``.text`` and ``.tail`` properties. """ __tablename__ = "paragraph" #: The unique id of the ``Paragraph``. id = Column(Integer, ForeignKey("context.id", ondelete="CASCADE"), primary_key=True) #: The position of the ``Paragraph`` in the ``Document``. position = Column(Integer, nullable=False) #: The name of a ``Paragraph``. name = Column(String, unique=False, nullable=True) #: The id of the parent ``Document``. document_id = Column(Integer, ForeignKey("document.id")) #: The parent ``Document``. document = relationship( "Document", backref=backref("paragraphs", order_by=position, cascade="all, delete-orphan"), foreign_keys=document_id, ) #: The id of the parent ``Section``. section_id = Column(Integer, ForeignKey("section.id")) #: The parent ``Section``. section = relationship( "Section", backref=backref("paragraphs", cascade="all, delete-orphan"), foreign_keys=section_id, ) #: The id of the parent ``Cell``, if any. cell_id = Column(Integer, ForeignKey("cell.id")) #: The parent ``Cell``, if any. cell = relationship( "Cell", backref=backref("paragraphs", cascade="all, delete-orphan"), foreign_keys=cell_id, ) #: The id of the parent ``Caption``, if any. caption_id = Column(Integer, ForeignKey("caption.id")) #: The parent ``Caption``, if any. caption = relationship( "Caption", backref=backref("paragraphs", cascade="all, delete-orphan"), foreign_keys=caption_id, ) __mapper_args__ = {"polymorphic_identity": "paragraph"} __table_args__ = (UniqueConstraint(document_id, position),) def __repr__(self) -> str: if self.cell: return ( f"Paragraph(" f"Doc: {self.document.name}, " f"Sec: {self.section.position}, " f"Cell: {self.cell.position}, " f"Pos: {self.position}" f")" ) elif self.caption: return ( f"Paragraph(" f"Doc: {self.document.name}, " f"Sec: {self.section.position}, " f"Caption: {self.caption.position}, " f"Pos: {self.position}" ")" ) else: return ( f"Paragraph(" f"Doc: {self.document.name}, " f"Sec: {self.section.position}, " f"Pos: {self.position}" f")" ) def __gt__(self, other: "Paragraph") -> bool: # Allow sorting by comparing the string representations of each return self.__repr__() > other.__repr__()