Skip to content
Permalink
Browse files

[BIG] pytorch-transformers => transformers

  • Loading branch information...
thomwolf committed Sep 26, 2019
1 parent 2f071fc commit 31c23bd5ee26425a67f92fc170789656379252a6
Showing with 540 additions and 539 deletions.
  1. +11 βˆ’11 .circleci/config.yml
  2. +1 βˆ’1 .coveragerc
  3. +1 βˆ’1 .github/ISSUE_TEMPLATE/migration.md
  4. +24 βˆ’24 README.md
  5. +1 βˆ’1 docker/Dockerfile
  6. +1 βˆ’1 docs/source/_static/js/custom.js
  7. +1 βˆ’1 docs/source/bertology.rst
  8. +6 βˆ’6 docs/source/conf.py
  9. +7 βˆ’7 docs/source/converting_tensorflow_models.rst
  10. +3 βˆ’3 docs/source/index.rst
  11. +6 βˆ’6 docs/source/installation.rst
  12. +1 βˆ’1 docs/source/main_classes/configuration.rst
  13. +1 βˆ’1 docs/source/main_classes/model.rst
  14. +6 βˆ’6 docs/source/main_classes/optimizer_schedules.rst
  15. +1 βˆ’1 docs/source/main_classes/tokenizer.rst
  16. +7 βˆ’7 docs/source/migration.md
  17. +3 βˆ’3 docs/source/model_doc/auto.rst
  18. +10 βˆ’10 docs/source/model_doc/bert.rst
  19. +6 βˆ’6 docs/source/model_doc/distilbert.rst
  20. +5 βˆ’5 docs/source/model_doc/gpt.rst
  21. +5 βˆ’5 docs/source/model_doc/gpt2.rst
  22. +5 βˆ’5 docs/source/model_doc/roberta.rst
  23. +4 βˆ’4 docs/source/model_doc/transformerxl.rst
  24. +6 βˆ’6 docs/source/model_doc/xlm.rst
  25. +6 βˆ’6 docs/source/model_doc/xlnet.rst
  26. +4 βˆ’4 docs/source/notebooks.rst
  27. +4 βˆ’4 docs/source/pretrained_models.rst
  28. +5 βˆ’5 docs/source/quickstart.md
  29. +2 βˆ’2 docs/source/serialization.rst
  30. +2 βˆ’2 docs/source/torchscript.rst
  31. +4 βˆ’4 examples/README.md
  32. +1 βˆ’1 examples/contrib/run_openai_gpt.py
  33. +4 βˆ’4 examples/contrib/run_swag.py
  34. +1 βˆ’1 examples/contrib/run_transfo_xl.py
  35. +2 βˆ’2 examples/distillation/README.md
  36. +1 βˆ’1 examples/distillation/distiller.py
  37. +1 βˆ’1 examples/distillation/scripts/binarized_data.py
  38. +1 βˆ’1 examples/distillation/scripts/extract_for_distil.py
  39. +2 βˆ’2 examples/distillation/train.py
  40. +1 βˆ’1 examples/run_bertology.py
  41. +5 βˆ’5 examples/run_generation.py
  42. +8 βˆ’8 examples/run_glue.py
  43. +3 βˆ’3 examples/run_lm_finetuning.py
  44. +5 βˆ’5 examples/run_multiple_choice.py
  45. +4 βˆ’4 examples/run_squad.py
  46. +1 βˆ’1 examples/run_tf_glue.py
  47. +1 βˆ’1 examples/utils_squad.py
  48. +25 βˆ’25 hubconf.py
  49. +5 βˆ’5 setup.py
  50. +2 βˆ’2 {pytorch_transformers β†’ transformers}/__init__.py
  51. +16 βˆ’16 {pytorch_transformers β†’ transformers}/__main__.py
  52. +2 βˆ’2 {pytorch_transformers β†’ transformers}/configuration_auto.py
  53. +1 βˆ’1 {pytorch_transformers β†’ transformers}/configuration_bert.py
  54. 0 {pytorch_transformers β†’ transformers}/configuration_distilbert.py
  55. 0 {pytorch_transformers β†’ transformers}/configuration_gpt2.py
  56. 0 {pytorch_transformers β†’ transformers}/configuration_openai.py
  57. 0 {pytorch_transformers β†’ transformers}/configuration_roberta.py
  58. 0 {pytorch_transformers β†’ transformers}/configuration_transfo_xl.py
  59. +3 βˆ’3 {pytorch_transformers β†’ transformers}/configuration_utils.py
  60. 0 {pytorch_transformers β†’ transformers}/configuration_xlm.py
  61. 0 {pytorch_transformers β†’ transformers}/configuration_xlnet.py
  62. +1 βˆ’1 {pytorch_transformers β†’ transformers}/convert_bert_original_tf_checkpoint_to_pytorch.py
  63. +1 βˆ’1 {pytorch_transformers β†’ transformers}/convert_bert_pytorch_checkpoint_to_original_tf.py
  64. +1 βˆ’1 {pytorch_transformers β†’ transformers}/convert_gpt2_original_tf_checkpoint_to_pytorch.py
  65. +1 βˆ’1 {pytorch_transformers β†’ transformers}/convert_openai_original_tf_checkpoint_to_pytorch.py
  66. +3 βˆ’3 {pytorch_transformers β†’ transformers}/convert_pytorch_checkpoint_to_tf2.py
  67. +2 βˆ’2 {pytorch_transformers β†’ transformers}/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
  68. +4 βˆ’4 {pytorch_transformers β†’ transformers}/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
  69. +2 βˆ’2 {pytorch_transformers β†’ transformers}/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
  70. +1 βˆ’1 {pytorch_transformers β†’ transformers}/convert_xlnet_original_tf_checkpoint_to_pytorch.py
  71. 0 {pytorch_transformers β†’ transformers}/data/__init__.py
  72. 0 {pytorch_transformers β†’ transformers}/data/metrics/__init__.py
  73. 0 {pytorch_transformers β†’ transformers}/data/processors/__init__.py
  74. 0 {pytorch_transformers β†’ transformers}/data/processors/glue.py
  75. 0 {pytorch_transformers β†’ transformers}/data/processors/utils.py
  76. +5 βˆ’4 {pytorch_transformers β†’ transformers}/file_utils.py
  77. +24 βˆ’24 {pytorch_transformers β†’ transformers}/modeling_auto.py
  78. +5 βˆ’5 {pytorch_transformers β†’ transformers}/modeling_bert.py
  79. +2 βˆ’2 {pytorch_transformers β†’ transformers}/modeling_distilbert.py
  80. +7 βˆ’7 {pytorch_transformers β†’ transformers}/modeling_gpt2.py
  81. +5 βˆ’5 {pytorch_transformers β†’ transformers}/modeling_openai.py
  82. +7 βˆ’7 {pytorch_transformers β†’ transformers}/modeling_roberta.py
  83. +24 βˆ’24 {pytorch_transformers β†’ transformers}/modeling_tf_auto.py
  84. +13 βˆ’13 {pytorch_transformers β†’ transformers}/modeling_tf_bert.py
  85. +6 βˆ’6 {pytorch_transformers β†’ transformers}/modeling_tf_distilbert.py
  86. +8 βˆ’8 {pytorch_transformers β†’ transformers}/modeling_tf_gpt2.py
  87. +8 βˆ’8 {pytorch_transformers β†’ transformers}/modeling_tf_openai.py
  88. +2 βˆ’2 {pytorch_transformers β†’ transformers}/modeling_tf_pytorch_utils.py
  89. +7 βˆ’7 {pytorch_transformers β†’ transformers}/modeling_tf_roberta.py
  90. +7 βˆ’7 {pytorch_transformers β†’ transformers}/modeling_tf_transfo_xl.py
  91. 0 {pytorch_transformers β†’ transformers}/modeling_tf_transfo_xl_utilities.py
  92. +9 βˆ’9 {pytorch_transformers β†’ transformers}/modeling_tf_utils.py
  93. +9 βˆ’9 {pytorch_transformers β†’ transformers}/modeling_tf_xlm.py
  94. +9 βˆ’9 {pytorch_transformers β†’ transformers}/modeling_tf_xlnet.py
  95. +5 βˆ’5 {pytorch_transformers β†’ transformers}/modeling_transfo_xl.py
  96. 0 {pytorch_transformers β†’ transformers}/modeling_transfo_xl_utilities.py
  97. +12 βˆ’12 {pytorch_transformers β†’ transformers}/modeling_utils.py
  98. +6 βˆ’6 {pytorch_transformers β†’ transformers}/modeling_xlm.py
  99. +5 βˆ’5 {pytorch_transformers β†’ transformers}/modeling_xlnet.py
  100. 0 {pytorch_transformers β†’ transformers}/optimization.py
  101. 0 {pytorch_transformers β†’ transformers}/tests/__init__.py
  102. 0 {pytorch_transformers β†’ transformers}/tests/configuration_common_test.py
  103. 0 {pytorch_transformers β†’ transformers}/tests/conftest.py
  104. 0 {pytorch_transformers β†’ transformers}/tests/fixtures/input.txt
  105. 0 {pytorch_transformers β†’ transformers}/tests/fixtures/sample_text.txt
  106. BIN {pytorch_transformers β†’ transformers}/tests/fixtures/test_sentencepiece.model
  107. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_auto_test.py
  108. +4 βˆ’4 {pytorch_transformers β†’ transformers}/tests/modeling_bert_test.py
  109. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_common_test.py
  110. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_distilbert_test.py
  111. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_gpt2_test.py
  112. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_openai_test.py
  113. +4 βˆ’4 {pytorch_transformers β†’ transformers}/tests/modeling_roberta_test.py
  114. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_auto_test.py
  115. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_bert_test.py
  116. +7 βˆ’7 {pytorch_transformers β†’ transformers}/tests/modeling_tf_common_test.py
  117. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_distilbert_test.py
  118. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_gpt2_test.py
  119. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_openai_gpt_test.py
  120. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_roberta_test.py
  121. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_transfo_xl_test.py
  122. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_xlm_test.py
  123. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tests/modeling_tf_xlnet_test.py
  124. +4 βˆ’4 {pytorch_transformers β†’ transformers}/tests/modeling_transfo_xl_test.py
  125. +4 βˆ’4 {pytorch_transformers β†’ transformers}/tests/modeling_xlm_test.py
  126. +4 βˆ’4 {pytorch_transformers β†’ transformers}/tests/modeling_xlnet_test.py
  127. +2 βˆ’2 {pytorch_transformers β†’ transformers}/tests/optimization_test.py
  128. +2 βˆ’2 {pytorch_transformers β†’ transformers}/tests/tokenization_auto_test.py
  129. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tests/tokenization_bert_test.py
  130. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tests/tokenization_distilbert_test.py
  131. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tests/tokenization_gpt2_test.py
  132. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tests/tokenization_openai_test.py
  133. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tests/tokenization_roberta_test.py
  134. 0 {pytorch_transformers β†’ transformers}/tests/tokenization_tests_commons.py
  135. +2 βˆ’2 {pytorch_transformers β†’ transformers}/tests/tokenization_transfo_xl_test.py
  136. +2 βˆ’2 {pytorch_transformers β†’ transformers}/tests/tokenization_utils_test.py
  137. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tests/tokenization_xlm_test.py
  138. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tests/tokenization_xlnet_test.py
  139. +3 βˆ’3 {pytorch_transformers β†’ transformers}/tokenization_auto.py
  140. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tokenization_bert.py
  141. +1 βˆ’1 {pytorch_transformers β†’ transformers}/tokenization_distilbert.py
  142. 0 {pytorch_transformers β†’ transformers}/tokenization_gpt2.py
  143. 0 {pytorch_transformers β†’ transformers}/tokenization_openai.py
  144. 0 {pytorch_transformers β†’ transformers}/tokenization_roberta.py
  145. 0 {pytorch_transformers β†’ transformers}/tokenization_transfo_xl.py
  146. +6 βˆ’6 {pytorch_transformers β†’ transformers}/tokenization_utils.py
  147. 0 {pytorch_transformers β†’ transformers}/tokenization_xlm.py
  148. 0 {pytorch_transformers β†’ transformers}/tokenization_xlnet.py
@@ -1,7 +1,7 @@
version: 2
jobs:
build_py3_torch_and_tf:
working_directory: ~/pytorch-transformers
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
resource_class: xlarge
@@ -13,10 +13,10 @@ jobs:
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov
- run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
- run: python -m pytest -sv ./transformers/tests/ --cov
- run: codecov
build_py3_torch:
working_directory: ~/pytorch-transformers
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
resource_class: xlarge
@@ -27,11 +27,11 @@ jobs:
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov
- run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
- run: python -m pytest -sv ./transformers/tests/ --cov
- run: python -m pytest -sv ./examples/
- run: codecov
build_py3_tf:
working_directory: ~/pytorch-transformers
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
resource_class: xlarge
@@ -42,10 +42,10 @@ jobs:
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov
- run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
- run: python -m pytest -sv ./transformers/tests/ --cov
- run: codecov
build_py2_torch:
working_directory: ~/pytorch-transformers
working_directory: ~/transformers
resource_class: large
parallelism: 1
docker:
@@ -55,10 +55,10 @@ jobs:
- run: sudo pip install torch
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
- run: python -m pytest -sv ./transformers/tests/ --cov
- run: codecov
build_py2_tf:
working_directory: ~/pytorch-transformers
working_directory: ~/transformers
resource_class: large
parallelism: 1
docker:
@@ -68,10 +68,10 @@ jobs:
- run: sudo pip install tensorflow==2.0.0-rc0
- run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov
- run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
- run: python -m pytest -sv ./transformers/tests/ --cov
- run: codecov
deploy_doc:
working_directory: ~/pytorch-transformers
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
steps:
@@ -1,5 +1,5 @@
[run]
source=pytorch_transformers
source=transformers
omit =
# skip convertion scripts from testing for now
*/convert_*
@@ -1,6 +1,6 @@
---
name: "\U0001F4DA Migration from PyTorch-pretrained-Bert"
about: Report a problem when migrating from PyTorch-pretrained-Bert to PyTorch-Transformers
about: Report a problem when migrating from PyTorch-pretrained-Bert to Transformers
---

## πŸ“š Migration
@@ -1,8 +1,8 @@
# πŸ‘Ύ PyTorch-Transformers
# πŸ€— Transformers

[![CircleCI](https://circleci.com/gh/huggingface/pytorch-transformers.svg?style=svg)](https://circleci.com/gh/huggingface/pytorch-transformers)
[![CircleCI](https://circleci.com/gh/huggingface/transformers.svg?style=svg)](https://circleci.com/gh/huggingface/transformers)

PyTorch-Transformers (formerly known as `pytorch-pretrained-bert`) is a library of state-of-the-art pre-trained models for Natural Language Processing (NLP).
Transformers (formerly known as `pytorch-pretrained-bert`) is a library of state-of-the-art pre-trained models for Natural Language Processing (NLP).

The library currently contains PyTorch implementations, pre-trained model weights, usage scripts and conversion utilities for the following models:

@@ -13,30 +13,30 @@ The library currently contains PyTorch implementations, pre-trained model weight
5. **[XLNet](https://github.com/zihangdai/xlnet/)** (from Google/CMU) released with the paper [​XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) by Zhilin Yang*, Zihang Dai*, Yiming Yang, Jaime Carbonell, Ruslan Salakhutdinov, Quoc V. Le.
6. **[XLM](https://github.com/facebookresearch/XLM/)** (from Facebook) released together with the paper [Cross-lingual Language Model Pretraining](https://arxiv.org/abs/1901.07291) by Guillaume Lample and Alexis Conneau.
7. **[RoBERTa](https://github.com/pytorch/fairseq/tree/master/examples/roberta)** (from Facebook), released together with the paper a [Robustly Optimized BERT Pretraining Approach](https://arxiv.org/abs/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.
8. **[DistilBERT](https://github.com/huggingface/pytorch-transformers/tree/master/examples/distillation)** (from HuggingFace), released together with the blogpost [Smaller, faster, cheaper, lighter: Introducing DistilBERT, a distilled version ofΒ BERT](https://medium.com/huggingface/distilbert-8cf3380435b5
8. **[DistilBERT](https://github.com/huggingface/transformers/tree/master/examples/distillation)** (from HuggingFace), released together with the blogpost [Smaller, faster, cheaper, lighter: Introducing DistilBERT, a distilled version ofΒ BERT](https://medium.com/huggingface/distilbert-8cf3380435b5
) by Victor Sanh, Lysandre Debut and Thomas Wolf.

These implementations have been tested on several datasets (see the example scripts) and should match the performances of the original implementations (e.g. ~93 F1 on SQuAD for BERT Whole-Word-Masking, ~88 F1 on RocStories for OpenAI GPT, ~18.3 perplexity on WikiText 103 for Transformer-XL, ~0.916 Peason R coefficient on STS-B for XLNet). You can find more details on the performances in the Examples section of the [documentation](https://huggingface.co/pytorch-transformers/examples.html).
These implementations have been tested on several datasets (see the example scripts) and should match the performances of the original implementations (e.g. ~93 F1 on SQuAD for BERT Whole-Word-Masking, ~88 F1 on RocStories for OpenAI GPT, ~18.3 perplexity on WikiText 103 for Transformer-XL, ~0.916 Peason R coefficient on STS-B for XLNet). You can find more details on the performances in the Examples section of the [documentation](https://huggingface.co/transformers/examples.html).

| Section | Description |
|-|-|
| [Installation](#installation) | How to install the package |
| [Online demo](#online-demo) | Experimenting with this repo’s text generation capabilities |
| [Quick tour: Usage](#quick-tour) | Tokenizers & models usage: Bert and GPT-2 |
| [Quick tour: Fine-tuning/usage scripts](#quick-tour-of-the-fine-tuningusage-scripts) | Using provided scripts: GLUE, SQuAD and Text generation |
| [Migrating from pytorch-pretrained-bert to pytorch-transformers](#Migrating-from-pytorch-pretrained-bert-to-pytorch-transformers) | Migrating your code from pytorch-pretrained-bert to pytorch-transformers |
| [Documentation](https://huggingface.co/pytorch-transformers/) | Full API documentation and more |
| [Migrating from pytorch-pretrained-bert to transformers](#Migrating-from-pytorch-pretrained-bert-to-transformers) | Migrating your code from pytorch-pretrained-bert to transformers |
| [Documentation](https://huggingface.co/transformers/) | Full API documentation and more |

## Installation

This repo is tested on Python 2.7 and 3.5+ (examples are tested only on python 3.5+) and PyTorch 1.0.0+

### With pip

PyTorch-Transformers can be installed by pip as follows:
Transformers can be installed by pip as follows:

```bash
pip install pytorch-transformers
pip install transformers
```

### From source
@@ -49,14 +49,14 @@ pip install [--editable] .

### Tests

A series of tests is included for the library and the example scripts. Library tests can be found in the [tests folder](https://github.com/huggingface/pytorch-transformers/tree/master/pytorch_transformers/tests) and examples tests in the [examples folder](https://github.com/huggingface/pytorch-transformers/tree/master/examples).
A series of tests is included for the library and the example scripts. Library tests can be found in the [tests folder](https://github.com/huggingface/transformers/tree/master/transformers/tests) and examples tests in the [examples folder](https://github.com/huggingface/transformers/tree/master/examples).

These tests can be run using `pytest` (install pytest if needed with `pip install pytest`).

You can run the tests from the root of the cloned repository with the commands:

```bash
python -m pytest -sv ./pytorch_transformers/tests/
python -m pytest -sv ./transformers/tests/
python -m pytest -sv ./examples/
```

@@ -80,13 +80,13 @@ You can use it to experiment with completions generated by `GPT2Model`, `Transfo

## Quick tour

Let's do a very quick overview of PyTorch-Transformers. Detailed examples for each model architecture (Bert, GPT, GPT-2, Transformer-XL, XLNet and XLM) can be found in the [full documentation](https://huggingface.co/pytorch-transformers/).
Let's do a very quick overview of Transformers. Detailed examples for each model architecture (Bert, GPT, GPT-2, Transformer-XL, XLNet and XLM) can be found in the [full documentation](https://huggingface.co/transformers/).

```python
import torch
from pytorch_transformers import *
from transformers import *
# PyTorch-Transformers has a unified API
# Transformers has a unified API
# for 7 transformer architectures and 30 pretrained weights.
# Model | Tokenizer | Pretrained weights shortcut
MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'),
@@ -299,19 +299,19 @@ python ./examples/run_generation.py \
--model_name_or_path=gpt2 \
```

## Migrating from pytorch-pretrained-bert to pytorch-transformers
## Migrating from pytorch-pretrained-bert to transformers

Here is a quick summary of what you should take care of when migrating from `pytorch-pretrained-bert` to `pytorch-transformers`
Here is a quick summary of what you should take care of when migrating from `pytorch-pretrained-bert` to `transformers`

### Models always output `tuples`

The main breaking change when migrating from `pytorch-pretrained-bert` to `pytorch-transformers` is that the models forward method always outputs a `tuple` with various elements depending on the model and the configuration parameters.
The main breaking change when migrating from `pytorch-pretrained-bert` to `transformers` is that the models forward method always outputs a `tuple` with various elements depending on the model and the configuration parameters.

The exact content of the tuples for each model are detailed in the models' docstrings and the [documentation](https://huggingface.co/pytorch-transformers/).
The exact content of the tuples for each model are detailed in the models' docstrings and the [documentation](https://huggingface.co/transformers/).

In pretty much every case, you will be fine by taking the first element of the output as the output you previously used in `pytorch-pretrained-bert`.

Here is a `pytorch-pretrained-bert` to `pytorch-transformers` conversion example for a `BertForSequenceClassification` classification model:
Here is a `pytorch-pretrained-bert` to `transformers` conversion example for a `BertForSequenceClassification` classification model:

```python
# Let's load our model
@@ -320,11 +320,11 @@ model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
# If you used to have this line in pytorch-pretrained-bert:
loss = model(input_ids, labels=labels)
# Now just use this line in pytorch-transformers to extract the loss from the output tuple:
# Now just use this line in transformers to extract the loss from the output tuple:
outputs = model(input_ids, labels=labels)
loss = outputs[0]
# In pytorch-transformers you can also have access to the logits:
# In transformers you can also have access to the logits:
loss, logits = outputs[:2]
# And even the attention weights if you configure the model to output them (and other outputs too, see the docstrings and documentation)
@@ -339,7 +339,7 @@ Breaking change in the `from_pretrained()`method:

1. Models are now set in evaluation mode by default when instantiated with the `from_pretrained()` method. To train them don't forget to set them back in training mode (`model.train()`) to activate the dropout modules.

2. The additional `*input` and `**kwargs` arguments supplied to the `from_pretrained()` method used to be directly passed to the underlying model's class `__init__()` method. They are now used to update the model configuration attribute instead which can break derived model classes build based on the previous `BertForSequenceClassification` examples. We are working on a way to mitigate this breaking change in [#866](https://github.com/huggingface/pytorch-transformers/pull/866) by forwarding the the model `__init__()` method (i) the provided positional arguments and (ii) the keyword arguments which do not match any configuration class attributes.
2. The additional `*input` and `**kwargs` arguments supplied to the `from_pretrained()` method used to be directly passed to the underlying model's class `__init__()` method. They are now used to update the model configuration attribute instead which can break derived model classes build based on the previous `BertForSequenceClassification` examples. We are working on a way to mitigate this breaking change in [#866](https://github.com/huggingface/transformers/pull/866) by forwarding the the model `__init__()` method (i) the provided positional arguments and (ii) the keyword arguments which do not match any configuration class attributes.

Also, while not a breaking change, the serialization methods have been standardized and you probably should switch to the new method `save_pretrained(save_directory)` if you were using any other serialization method before.

@@ -396,7 +396,7 @@ for batch in train_data:
loss.backward()
optimizer.step()
### In PyTorch-Transformers, optimizer and schedules are splitted and instantiated like this:
### In Transformers, optimizer and schedules are splitted and instantiated like this:
optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # PyTorch scheduler
### and used like this:
@@ -411,4 +411,4 @@ for batch in train_data:

## Citation

At the moment, there is no paper associated to PyTorch-Transformers but we are working on preparing one. In the meantime, please include a mention of the library and a link to the present repository if you use this work in a published or open-source project.
At the moment, there is no paper associated to Transformers but we are working on preparing one. In the meantime, please include a mention of the library and a link to the present repository if you use this work in a published or open-source project.
@@ -2,6 +2,6 @@ FROM pytorch/pytorch:latest

RUN git clone https://github.com/NVIDIA/apex.git && cd apex && python setup.py install --cuda_ext --cpp_ext

RUN pip install pytorch_transformers
RUN pip install transformers

WORKDIR /workspace
@@ -16,7 +16,7 @@ function addIcon() {
function addCustomFooter() {
const customFooter = document.createElement("div");
const questionOrIssue = document.createElement("div");
questionOrIssue.innerHTML = "Stuck? Read our <a href='https://medium.com/huggingface'>Blog posts</a> or <a href='https://github.com/huggingface/pytorch_transformers'>Create an issue</a>";
questionOrIssue.innerHTML = "Stuck? Read our <a href='https://medium.com/huggingface'>Blog posts</a> or <a href='https://github.com/huggingface/transformers'>Create an issue</a>";
customFooter.appendChild(questionOrIssue);
customFooter.classList.add("footer");

@@ -15,4 +15,4 @@ In order to help this new field develop, we have included a few additional featu
* accessing all the attention weights for each head of BERT/GPT/GPT-2,
* retrieving heads output values and gradients to be able to compute head importance score and prune head as explained in https://arxiv.org/abs/1905.10650.

To help you understand and use these features, we have added a specific example script: `bertology.py <https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_bertology.py>`_ while extract information and prune a model pre-trained on GLUE.
To help you understand and use these features, we have added a specific example script: `bertology.py <https://github.com/huggingface/transformers/blob/master/examples/run_bertology.py>`_ while extract information and prune a model pre-trained on GLUE.
@@ -19,7 +19,7 @@

# -- Project information -----------------------------------------------------

project = u'pytorch-transformers'
project = u'transformers'
copyright = u'2019, huggingface'
author = u'huggingface'

@@ -109,7 +109,7 @@
# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'pytorch-transformersdoc'
htmlhelp_basename = 'transformersdoc'


# -- Options for LaTeX output ------------------------------------------------
@@ -136,7 +136,7 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'pytorch-transformers.tex', u'pytorch-transformers Documentation',
(master_doc, 'transformers.tex', u'transformers Documentation',
u'huggingface', 'manual'),
]

@@ -146,7 +146,7 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pytorch-transformers', u'pytorch-transformers Documentation',
(master_doc, 'transformers', u'transformers Documentation',
[author], 1)
]

@@ -157,8 +157,8 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'pytorch-transformers', u'pytorch-transformers Documentation',
author, 'pytorch-transformers', 'One line description of project.',
(master_doc, 'transformers', u'transformers Documentation',
author, 'transformers', 'One line description of project.',
'Miscellaneous'),
]

0 comments on commit 31c23bd

Please sign in to comment.
You can’t perform that action at this time.