import React from 'react';
import { MainContentColumnWrapper } from '../../components/layout/Layout';
import styled from 'styled-components';
import {
  TextMedium,
  Upheader,
} from '../../components/atoms/Typography/Typography';
import BioprojectsIcon from '../../assets/images/bioprojects.inline.svg';
import SizeIcon from '../../assets/images/size.inline.svg';
import SequencesAmountIcon from '../../assets/images/sequencesAmount.inline.svg';
import CheckIcon from '../../assets/images/check.inline.svg';
import { ExternalLink } from '../../components/atoms/Link';
import { forScreenAtLeast1000pxWide } from '../../styles/mediaQueries';
import { css } from 'styled-components';
import { descriptionSectionId } from './constants';

const DescriptionSection = (): JSX.Element => {
  return (
    <Wrapper id={descriptionSectionId}>
      <InnerWrapper>
        <MainContentColumnWrapper>
          <TextWrapper>
            <TitleText $color="secondary" $textAlign="center" $fontWeight="700">
              Introduction
            </TitleText>
            <BaseText $color="delicateAccented">
              Immunoglobulins are a highly versatile type of protein, boasting
              estimated theoretical diversity close to{' '}
              <BaseText $color="secondary" $fontWeight="700" as="span">
                10<sup>18</sup>
              </BaseText>
              . Next-generation sequencing studies now allow us to sample
              portions of this diversity to create large datasets to facilitate
              novel immunological studies but also for machine learning
              applications such as training large language models.
            </BaseText>
            <BaseText $color="delicateAccented">
              We automatically mined the Sequence Read Archive repository for
              depositions containing immunoglobulin sequences. We found more
              than{' '}
              <BaseText $color="secondary" $fontWeight="700" as="span">
                220 bioprojects
              </BaseText>{' '}
              across multiple disease states. We annotated the{' '}
              <BaseText $color="secondary" $fontWeight="700" as="span">
                ~11,000 biosamples
              </BaseText>{' '}
              associated with these and processed the sequences using a uniform
              pipeline. Of these, more than 130 are bioprojects containing human
              immunoglobulin sequences and we make these available in a public
              version accompanying our manuscript.
            </BaseText>
          </TextWrapper>
          <StatisticsWrapper>
            <Upheader $color="delicateAccented">Database statistics</Upheader>
            <StatisticsContentWrapper>
              <StatisticWrapper>
                <BioprojectsIcon />
                <StatisticTextWrapper>
                  <StatisticText $color="secondary" $fontWeight="700">
                    130+
                  </StatisticText>
                  <BaseText $color="delicateAccented">
                    source bioprojects
                  </BaseText>
                </StatisticTextWrapper>
              </StatisticWrapper>
              <StatisticWrapper>
                <SizeIcon />
                <StatisticTextWrapper>
                  <StatisticText $color="secondary" $fontWeight="700">
                    ~60GB
                  </StatisticText>
                  <BaseText $color="delicateAccented">
                    database size in .fasta
                  </BaseText>
                </StatisticTextWrapper>
              </StatisticWrapper>
              <StatisticWrapper>
                <SequencesAmountIcon />
                <StatisticTextWrapper>
                  <StatisticText $color="secondary" $fontWeight="700">
                    ~3B
                  </StatisticText>
                  <BaseText $color="delicateAccented">
                    number of unique sequences
                  </BaseText>
                </StatisticTextWrapper>
              </StatisticWrapper>
            </StatisticsContentWrapper>
          </StatisticsWrapper>
          <TextWrapper>
            <BaseText $color="delicateAccented">
              The entire database of 220 bioprojects in .airr format was more
              than 1TB-compressed, making it inefficient for most practical
              applications. To facilitate public access, we make a subset of
              human bioprojects available to accompany our publication, with
              sequences and metadata annotations in .fasta format. The entire
              dataset contains approximately{' '}
              <BaseText $color="secondary" $fontWeight="700" as="span">
                3b unique sequences, comprising heavy & light sequences, and is
                a more manageable 60GB in size.
              </BaseText>
            </BaseText>
            <BaseText $color="delicateAccented">
              Fasta file sequence entry has the following metadata in its header
              line:
            </BaseText>
            <List>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  V region call {'('}detected by us{')'}
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  J region call {'('}detected by us{')'}
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  Isotype {'('}detected by us{')'}
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  CDR-H3 sequence according to IMGT
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  Whether large part of fw1 is missing {'('}is fw1 length less
                  than 20{')'}
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  Redundancy of the sequence within bioproject
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  Bioproject-annotated isotype info
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  Bioproject-annotated disease state
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  Bioproject-annotated B-cell type
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="tertiary" $fontWeight="700">
                  Internal bioproject index to distinguish between studies
                </TextMedium>
              </ListItem>
            </List>
          </TextWrapper>
          <TextWrapper>
            <FormulaWrapper>
              <BaseText $color="delicateAccented">
                For instance in the following example:
              </BaseText>
              <Formula>
                <BaseText $color="secondary" $fontWeight="700">
                  IGHV4-59*05,IGHJ4*02,IGHM,ALTWIQLWLAPHSFDY,True,1.0,None,healthy,naive,30
                </BaseText>
              </Formula>
            </FormulaWrapper>
            <BaseText $color="delicateAccented">
              The V call is IGHV4-59*05, the J call is IGHJ4*02, we detected
              this isotype as IGHM, the CDR-H3 sequence is ALTWIQLWLAPHSFDY, the
              fw1 is largely incomplete {'('}True{')'}, there was only one copy
              of the sequence in the bioproject, there was no bioproject isotype
              annotation, the disease state was {'"'}healthy{'"'}, the B-cell
              type was {'"'}naive{'"'}, and the bioproject index is 30. Sample
              from the data is given here:
            </BaseText>
            <ExternalLinkButton
              href="/files/sampleNGS.fasta"
              download
              $color="tertiary"
            >
              Download sample data
            </ExternalLinkButton>
          </TextWrapper>
        </MainContentColumnWrapper>
      </InnerWrapper>
      <InnerWrapper useGreyBg>
        <MainContentColumnWrapper>
          <TextWrapper>
            <TitleText $color="secondary" $fontWeight="700" $textAlign="center">
              B-cell types
            </TitleText>
            <BaseText $color="delicateAccented">
              We distinguish the following B-cell types in our metadata:
            </BaseText>
            <List useOneColumn>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Naive:
                  </TextMedium>{' '}
                  B-cell that hasn{"'"}t been previously exposed to an antigen
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Memory:
                  </TextMedium>{' '}
                  B-cells which were produced in response to T-cell dependent
                  antigens. They enable recognition of previously known antigens
                  and trigger secondary immune responses
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Pre:
                  </TextMedium>{' '}
                  the last stage of B-cell development just before naive B-cell
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Plasmablast:
                  </TextMedium>{' '}
                  immature plasma cells, which are capable of antibody
                  production but in lesser amount than the plasma cells {'('}
                  short-lived effector cells{')'}
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Germline center:
                  </TextMedium>{' '}
                  those B-cells are source of high-affinity and class switched
                  antibodies
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Regulatory:
                  </TextMedium>{' '}
                  type of B-cell that takes part in suppression and
                  immunomodulation. They execute their role by secretion of
                  anti-inflammatory cytokines {'('}IL-10, TGF-beta etc{')'} and
                  Granzyme B production.
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Plasma cells:
                  </TextMedium>{' '}
                  type of long-lived B-cell that produces antibody after being
                  presented to a specific antigen
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Follicular:
                  </TextMedium>{' '}
                  B-cells freely recirculating cells that home to the lymphoid
                  follicles in the secondary lymphoid organs
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    ProB - PreB:
                  </TextMedium>{' '}
                  the first stage of B-cell development just after CLP {'('}
                  common lymphoid progenitor{')'}
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Transitional:
                  </TextMedium>{' '}
                  B-cells that bone marrow-derived, immature B-cells, which are
                  also considered to be precursors of mature B-cells
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Marginal zone:
                  </TextMedium>{' '}
                  B-cells occurring in the marginal (peripheral) zone of the
                  lymph node
                </TextMedium>
              </ListItem>
              <ListItem>
                <CheckIcon />
                <TextMedium $color="delicateAccented">
                  <TextMedium $color="tertiary" $fontWeight="700" as="span">
                    Mature:
                  </TextMedium>{' '}
                  B-cell that has been exposed to an antigen
                </TextMedium>
              </ListItem>
            </List>
          </TextWrapper>
        </MainContentColumnWrapper>
      </InnerWrapper>
      <InnerWrapper>
        <MainContentColumnWrapper>
          <TextWrapper>
            <TitleText $color="secondary" $fontWeight="700" $textAlign="center">
              Citing this work
            </TitleText>
            <BaseText $color="delicateAccented">
              We make the public version of ABNGS database available as a
              companion to our paper on convergence between natural and
              therapeutic antibodies. If you use this work please cite the
              following paper:
            </BaseText>
            <SummaryWrapper>
              <SummaryText $color="secondary" $fontWeight="700">
                Large-scale data mining of four billion human antibody variable
                regions reveals convergence between therapeutic and natural
                antibodies that constrains search space for biologics drug
                discovery.
              </SummaryText>
              <TextMedium $color="delicateAccented">
                Pawel Dudzic, Dawid Chomicz, Jarosław Kończak, Tadeusz Satława,
                Bartosz Janusz, Sonia Wrobel, Tomasz Gawłowski, Igor
                Jaszczyszyn, Weronika Bielska, Samuel Demharter, Roberto
                Spreafico, Lukas Schulte, Kyle Martin, Stephen R. Comeau, Konrad
                Krawczyk
              </TextMedium>
              <SummaryLabel>
                <SummaryLabelText
                  $color="tertiary"
                  $fontWeight="700"
                  $uppercase
                >
                  Under review
                </SummaryLabelText>
              </SummaryLabel>
            </SummaryWrapper>
          </TextWrapper>
        </MainContentColumnWrapper>
      </InnerWrapper>
    </Wrapper>
  );
};

const Wrapper = styled.div`
  position: relative;
  padding: 0 0 400px 0;
  background: ${(p) => p.theme.colors.backgrounds.secondary};
  ${forScreenAtLeast1000pxWide(css`
    padding: 0 0 320px 0;
  `)};
  &:before {
    content: '';
    position: absolute;
    top: -10px;
    left: 50%;
    width: 100%;
    max-width: 1134px;
    height: 10px;
    transform: translateX(-50%);
    background: ${(p) => p.theme.colors.backgrounds.secondary};
  }
`;

const InnerWrapper = styled.div<{ useGreyBg?: boolean }>`
  padding: 60px 0;
  background: ${(p) =>
    p.useGreyBg
      ? p.theme.colors.backgrounds.quaternary
      : p.theme.colors.backgrounds.secondary};
`;

const TextWrapper = styled.div`
  display: flex;
  flex-direction: column;
  row-gap: 30px;
  &:not(:last-child) {
    margin: 0 0 60px 0;
  }
  ${forScreenAtLeast1000pxWide(css`
    padding: 0 180px;
  `)}
`;

const TitleText = styled(TextMedium)`
  font-size: 28px;
`;

const BaseText = styled(TextMedium)`
  font-size: 16px;
`;

const StatisticsWrapper = styled.div`
  margin: 0 -30px 60px -30px;
  padding: 40px 30px 60px 30px;
  display: flex;
  flex-direction: column;
  align-items: center;
  row-gap: 40px;
  background: ${(p) => p.theme.colors.backgrounds.tertiary};
  ${forScreenAtLeast1000pxWide(css`
    margin: 0 0 60px 0;
    padding: 40px 20px 60px 20px;
  `)}
`;

const StatisticsContentWrapper = styled.div`
  display: flex;
  flex-direction: column;
  row-gap: 40px;
  ${forScreenAtLeast1000pxWide(css`
    flex-direction: row;
    column-gap: 80px;
    row-gap: 0;
  `)}
`;

const StatisticWrapper = styled.div`
  display: flex;
  align-items: center;
  column-gap: 10px;
`;

const StatisticTextWrapper = styled.div``;

const StatisticText = styled(TextMedium)`
  font-size: 48px;
  line-height: 0.9;
`;

const List = styled.ul<{ useOneColumn?: boolean }>`
  margin: 0;
  padding: 0;
  display: grid;
  align-items: flex-start;
  column-gap: 80px;
  row-gap: 20px;
  ${forScreenAtLeast1000pxWide(css<{ useOneColumn?: boolean }>`
    grid-template-columns: repeat(${(p) => (p.useOneColumn ? 1 : 2)}, 1fr);
  `)}
`;

const ListItem = styled.li`
  display: flex;
  align-items: center;
  column-gap: 20px;

  svg {
    flex-shrink: 0;
  }
`;

const FormulaWrapper = styled.div`
  display: flex;
  flex-direction: column;
  align-items: flex-start;
  row-gap: 10px;
`;

const Formula = styled.div`
  padding: 4px 10px;
  background: ${(p) => p.theme.colors.backgrounds.quaternary};

  ${BaseText} {
    word-break: break-word;
  }
`;

const SummaryWrapper = styled.div`
  padding: 10px 0 10px 35px;
  display: flex;
  flex-direction: column;
  align-items: flex-start;
  row-gap: 5px;
  border-left: 5px solid ${(p) => p.theme.colors.backgrounds.tertiary};
`;

const SummaryText = styled(TextMedium)`
  font-size: 20px;
`;

const SummaryLabel = styled.div`
  margin: 15px 0 0 0;
  padding: 0 15px;
  border-radius: 50px;
  background: #efe4bb;
`;

const SummaryLabelText = styled(TextMedium)`
  font-size: 10px;
  letter-spacing: 0.1em;
`;

const ExternalLinkButton = styled(ExternalLink)`
  padding: 15px 25px;
  align-self: center;
  border: 1px solid ${(p) => p.theme.colors.accents.secondaryAccented};
  border-radius: 50px;
  font-weight: 700;
  transition: 0.3s;
  &:hover {
    background: ${(p) => p.theme.colors.accents.secondaryAccented};
    color: ${(p) => p.theme.colors.texts.primary};
  }
`;

export default DescriptionSection;
