git / code.ach.gov.ru / gavrin / jupyter_snippet

commit 37166a642e810a1f909b4fd6d31939215ee30936

author Дмитрий Сергеевич Гаврин <gavrin_ds@ach.gov.ru>

date 2020-04-02 19:01:29 +0300

parents 70fe216e

browse tree at this commit

message

читает csv, читает из rar-архивов (только под windows)

files

fileadddel
ExcelPreprocessor.ipynb+234-143

patch

diff --git a/ExcelPreprocessor.ipynb b/ExcelPreprocessor.ipynb
index 02ca3a3f9bf1aaa43004ab62c24fc408d7b053ce..f9bede94d38ae91ffe1c3ce61313184d35fd8ee1 100644
--- a/ExcelPreprocessor.ipynb
+++ b/ExcelPreprocessor.ipynb
@@ -70,14 +70,13 @@     "import os\n",
     "import sys\n",
     "import glob\n",
     "import re\n",
-    "from itertools import islice\n",
+    "from itertools import islice, chain\n",
     "from collections.abc import Iterable\n",
     "from urllib.parse import urlparse\n",
     "from urllib.request import urlopen\n",
     "from pathlib import Path\n",
     "\n",
-    "import pandas as pd\n",
-    "import openpyxl"
+    "import pandas as pd"
    ]
   },
   {
@@ -141,15 +140,17 @@     "        '''\n",
     "        обрабатывает файлы в списке\n",
     "        принимает обычные файлы, zip-архивы и http - пути\n",
     "        '''\n",
+    "        FN_PATTERN = '.*\\.(xls[xm]|xls)$'\n",
+    "        \n",
     "        for filename in self.wb:\n",
     "            rf = filename\n",
     "            if filename.startswith('http'):\n",
     "                import io\n",
     "                with urlopen(filename) as resp:\n",
     "                    rf = io.BytesIO(resp.read())\n",
-    "                    if filename.endswith('.xlsx'):\n",
+    "                    if re.match(FN_PATTERN, filename):\n",
     "                        yield filename, rf\n",
-    "            elif filename.endswith('xlsx'):\n",
+    "            elif re.match(FN_PATTERN, filename):\n",
     "                yield filename, open(filename, 'rb')\n",
     "\n",
     "            if filename.endswith('.zip'):\n",
@@ -159,8 +160,6 @@     "                ZIP_FILENAME_UTF8_FLAG = 0x800\n",
     "                zf = ZipFile(rf , 'r')\n",
     "                for info in zf.filelist:\n",
     "                    zfilename = info.filename\n",
-    "                    if not zfilename.endswith('.xlsx'):\n",
-    "                        continue\n",
     "                    if info.flag_bits & ZIP_FILENAME_UTF8_FLAG == 0:\n",
     "                        filename_bytes = zfilename.encode('437')\n",
     "                        guessed_encoding = chardet.detect(filename_bytes)['encoding'] or 'cp1252'\n",
@@ -170,6 +169,39 @@     "                            if not re.match(self.in_archive_re_mask, zfilename):\n",
     "                                continue\n",
     "                        yield str(Path(filename) / Path(zfilename)), zf.open(info.filename, 'r')\n",
     "\n",
+    "            if filename.endswith('.rar'):\n",
+    "                import rarfile\n",
+    "                import io\n",
+    "                from sys import platform as _platform\n",
+    "                \n",
+    "                ex = Exception('платформа не поддерживается, скачайте утилиту unrar: https://www.rarlab.com/rar_add.htm')\n",
+    "\n",
+    "                if _platform == \"linux\" or _platform == \"linux2\": # linux\n",
+    "                    raise ex\n",
+    "\n",
+    "                elif _platform == \"win32\": # Windows\n",
+    "                    rarfile.UNRAR_TOOL = 'c:\\\\dev\\\\.jupyter\\\\jupyter_snippet\\\\UnRAR.exe'\n",
+    "\n",
+    "                elif _platform == \"win64\": # Windows 64-bit\n",
+    "                    raise ex\n",
+    "                elif _platform == 'darwin':\n",
+    "                    raise ex\n",
+    "                else:\n",
+    "                    raise Exception('неизвестная платформа, поищите rar для неё и узнайте название (sys.platform)')\n",
+    "\n",
+    "                rf = rarfile.RarFile(rf, 'r')\n",
+    "                for info in rf.infolist():\n",
+    "                    if info.filename == 'README':\n",
+    "                        continue\n",
+    "                    \n",
+    "                    rfilename = info.filename\n",
+    "\n",
+    "                    if self.in_archive_re_mask:\n",
+    "                        if not re.match(self.in_archive_re_mask, rfilename):\n",
+    "                            continue\n",
+    "                    \n",
+    "                    yield str(Path(filename) / Path(rfilename)), io.BytesIO(rf.read(info.filename))\n",
+    "\n",
     "    @property\n",
     "    def _columns(self):\n",
     "        columns = []\n",
@@ -187,45 +219,91 @@     "        if self.headers:\n",
     "            columns += self.headers\n",
     "        return columns\n",
     "\n",
-    "    def _iter_over_rows(self):\n",
-    "        'итерация по общей последовательности строк в итоговом наборе'\n",
+    "    def _process_xlsx(self, file_obj):\n",
+    "        'итерация по строкам из файла типа xls[x|m]'\n",
+    "        import openpyxl\n",
     "\n",
-    "        row_counter = 0\n",
+    "        wb = openpyxl.load_workbook(file_obj, read_only=True, data_only=False, keep_links=False)\n",
+    "        if isinstance(self.iterate_over_worksheet, int):\n",
+    "            iter_sheet = [self.iterate_over_worksheet]\n",
+    "        elif isinstance(self.iterate_over_worksheet, list) or isinstance(self.iterate_over_worksheet, tuple):\n",
+    "            iter_sheet = self.iterate_over_worksheet\n",
+    "        else:\n",
+    "            iter_sheet = [i for i, ws in enumerate(wb.worksheets) if not ws.title.startswith('hidden')]\n",
     "\n",
-    "        for fn, excel_file in self._process_files:\n",
-    "            wb = openpyxl.load_workbook(excel_file, read_only=True, data_only=False, keep_links=False)\n",
+    "        for ws_index in iter_sheet:\n",
+    "            ws = wb.worksheets[ws_index]\n",
     "\n",
-    "            if isinstance(self.iterate_over_worksheet, int):\n",
-    "                iter_sheet = [self.iterate_over_worksheet]\n",
-    "            elif isinstance(self.iterate_over_worksheet, list) or isinstance(self.iterate_over_worksheet, tuple):\n",
-    "                iter_sheet = self.iterate_over_worksheet\n",
-    "            else:\n",
-    "                iter_sheet = [i for i, ws in enumerate(wb.worksheets) if not ws.title.startswith('hidden')]\n",
+    "            result = ((cell.value for cell in row) for row in ws.rows)\n",
+    "            \n",
+    "            if self.first_row_number:\n",
+    "                result = islice(result, self.first_row_number - 1, None)\n",
     "\n",
-    "            for ws_index in iter_sheet:\n",
-    "                ws = wb.worksheets[ws_index]\n",
-    "                if self.filter_empty_cells:\n",
-    "                    result = ((cell.value for cell in row) for row in ws.rows if any(cell.value for cell in row))\n",
-    "                else:\n",
-    "                    result = ((cell.value for cell in row) for row in ws.rows)\n",
+    "            if self.filter_empty_cells:\n",
+    "                result = ((val for val in row) for row in result if any(val for val in row))\n",
     "\n",
-    "                if self.first_row_number:\n",
-    "                    result = islice(result, self.first_row_number - 1, None)\n",
-    "                if self.cells:\n",
-    "                    retry_cols_values = [ws[c].value for c in self.cells.values()]\n",
-    "                    result = ((*retry_cols_values, *row) for row in result)\n",
-    "                if self.append_ws_title_column:\n",
-    "                    result = ((ws.title, *row) for row in result)\n",
-    "                if self.filename_parser:\n",
-    "                    fncols = self.filename_parser(fn)\n",
-    "                    result = ((*fncols.values(), *row) for row in result)\n",
-    "                if self.wb_index:\n",
-    "                    result = ((i, *row) for i, row in enumerate(result))\n",
-    "                if self.global_index:\n",
-    "                    result = ((row_counter, *row) for i, row in enumerate(result))\n",
-    "                for row in result:\n",
-    "                    yield row\n",
-    "                    row_counter += 1\n",
+    "            if self.cells:\n",
+    "                retry_cols_values = [ws[c].value for c in self.cells.values()]\n",
+    "                result = ((*retry_cols_values, *row) for row in result)\n",
+    "            if self.append_ws_title_column:\n",
+    "                result = ((ws.title, *row) for row in result)\n",
+    "\n",
+    "            yield from result\n",
+    "    \n",
+    "    def _process_xls(self, file_obj):\n",
+    "        'итерация по рабочим листам книг формата xls'\n",
+    "        import xlrd\n",
+    "        l = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\n",
+    "        A_Z = [a+b for a in chain([''], l) for b in l]\n",
+    "        \n",
+    "        def addr_to_offset(addr):\n",
+    "            addr = re.match('([A-Z]+)(\\d+)', addr) # ('A', '14')\n",
+    "            return [A_Z.index(addr[0]), int(addr[1]) - 1]\n",
+    "        \n",
+    "        wb = xlrd.open_workbook(file_contents=file_obj.read(), on_demand=True)\n",
+    "\n",
+    "        if isinstance(self.iterate_over_worksheet, int):\n",
+    "            iter_sheet = [wb.sheet_by_index(self.iterate_over_worksheet)]\n",
+    "        elif isinstance(self.iterate_over_worksheet, list) or isinstance(self.iterate_over_worksheet, tuple):\n",
+    "            iter_sheet = [wb.sheet_by_index(idx) for idx in self.iterate_over_worksheet]\n",
+    "        else:\n",
+    "            iter_sheet =  [sheet for sheet in wb.sheets() if sheet.visibility==0]\n",
+    "\n",
+    "        for ws in iter_sheet:\n",
+    "            result = (ws.row_values(row_idx, start_colx=0, end_colx=None) for row_idx in range(ws.nrows))\n",
+    "            if self.first_row_number:\n",
+    "                result = islice(result, self.first_row_number - 1, None)\n",
+    "\n",
+    "            if self.filter_empty_cells:\n",
+    "                result = ((val for val in row) for row in result if any(val for val in row))\n",
+    "\n",
+    "            if self.cells:\n",
+    "                retry_cols_values = [ws.cell_value(*addr_to_offset(c)) for c in self.cells.values()]\n",
+    "                result = ((*retry_cols_values, *row) for row in result)\n",
+    "            if self.append_ws_title_column:\n",
+    "                result = ((ws.name, *row) for row in result)\n",
+    "\n",
+    "            yield from result\n",
+    "\n",
+    "    def _iter_over_rows(self):\n",
+    "        'итерация по общей последовательности строк в итоговом наборе'\n",
+    "        row_counter = 0\n",
+    "        for fn, excel_file in self._process_files:\n",
+    "            if fn.endswith('.xlsx') or fn.endswith('.xlsm'):\n",
+    "                result = self._process_xlsx(excel_file)\n",
+    "            else:\n",
+    "                result = self._process_xls(excel_file)\n",
+    "                \n",
+    "            if self.filename_parser:\n",
+    "                fncols = self.filename_parser(fn)\n",
+    "                result = ((*fncols.values(), *row) for row in result)\n",
+    "            if self.wb_index:\n",
+    "                result = ((i, *row) for i, row in enumerate(result))\n",
+    "            if self.global_index:\n",
+    "                result = ((row_counter, *row) for i, row in enumerate(result))\n",
+    "            for row in result:\n",
+    "                yield row\n",
+    "                row_counter += 1\n",
     "\n",
     "    def get_dataframe(self):\n",
     "        'возвращает набор данных в виде pandas DataFrame'\n",
@@ -1299,15 +1377,15 @@        "      <td>0</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
        "      <td>None</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>федеральным налогам и сборам</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>региональным налогам и сборам</td>\n",
+       "      <td>местным налогам и сборам</td>\n",
+       "      <td>налогам со специальным налоговым режимом</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1315,13 +1393,13 @@        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
+       "      <td>из них</td>\n",
+       "      <td>None</td>\n",
        "      <td>None</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Всего</td>\n",
-       "      <td>из них</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
+       "      <td>NaN</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
@@ -1333,15 +1411,15 @@        "      <td>2</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
        "      <td>None</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>None</td>\n",
-       "      <td>налог на прибыль организаций</td>\n",
-       "      <td>None</td>\n",
        "      <td>налог на добавленную стоимость</td>\n",
        "      <td>из графы 5 налог на добавленную стоимость по т...</td>\n",
        "      <td>платежи за пользование природными ресурсами</td>\n",
        "      <td>из графы 7\\n налог на добычу полезных ископаемых</td>\n",
        "      <td>остальные федеральные налоги и сборы</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1349,13 +1427,13 @@        "      <td>3</td>\n",
        "      <td>3</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
+       "      <td>в том числе в федеральный бюджет</td>\n",
+       "      <td>None</td>\n",
        "      <td>None</td>\n",
-       "      <td>NaN</td>\n",
        "      <td>None</td>\n",
-       "      <td>Всего</td>\n",
-       "      <td>в том числе в федеральный бюджет</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
+       "      <td>NaN</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
        "      <td>None</td>\n",
@@ -1366,16 +1444,16 @@        "      <td>4</td>\n",
        "      <td>4</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
-       "      <td>А</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
        "      <td>2</td>\n",
        "      <td>3</td>\n",
        "      <td>4</td>\n",
        "      <td>5</td>\n",
        "      <td>6</td>\n",
-       "      <td>7</td>\n",
+       "      <td>7.0</td>\n",
        "      <td>8</td>\n",
        "      <td>9</td>\n",
+       "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -1400,16 +1478,16 @@        "      <td>95</td>\n",
        "      <td>95</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
-       "      <td>Камчатский край</td>\n",
-       "      <td>3060920.0</td>\n",
+       "      <td>3060920</td>\n",
        "      <td>1273911</td>\n",
        "      <td>76849</td>\n",
        "      <td>9384</td>\n",
        "      <td>989177</td>\n",
        "      <td>989065</td>\n",
-       "      <td>16638</td>\n",
+       "      <td>16638.0</td>\n",
        "      <td>877</td>\n",
        "      <td>191247</td>\n",
+       "      <td>525892</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>96</th>\n",
@@ -1417,16 +1495,16 @@        "      <td>96</td>\n",
        "      <td>96</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
-       "      <td>Магаданская область</td>\n",
-       "      <td>1118695.0</td>\n",
+       "      <td>1118695</td>\n",
        "      <td>531438</td>\n",
        "      <td>121488</td>\n",
        "      <td>13819</td>\n",
        "      <td>249470</td>\n",
        "      <td>249469</td>\n",
-       "      <td>31627</td>\n",
+       "      <td>31627.0</td>\n",
        "      <td>29256</td>\n",
        "      <td>128853</td>\n",
+       "      <td>93277</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>97</th>\n",
@@ -1434,16 +1512,16 @@        "      <td>97</td>\n",
        "      <td>97</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
-       "      <td>Сахалинская область</td>\n",
-       "      <td>4404357.0</td>\n",
+       "      <td>4404357</td>\n",
        "      <td>2091467</td>\n",
        "      <td>458202</td>\n",
        "      <td>72443</td>\n",
        "      <td>1110375</td>\n",
        "      <td>1110306</td>\n",
-       "      <td>20647</td>\n",
+       "      <td>20647.0</td>\n",
        "      <td>4214</td>\n",
        "      <td>502243</td>\n",
+       "      <td>606823</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>98</th>\n",
@@ -1451,16 +1529,16 @@        "      <td>98</td>\n",
        "      <td>98</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
-       "      <td>Еврейская автономная область</td>\n",
-       "      <td>540981.0</td>\n",
+       "      <td>540981</td>\n",
        "      <td>120118</td>\n",
        "      <td>14430</td>\n",
        "      <td>1406</td>\n",
        "      <td>58193</td>\n",
        "      <td>58037</td>\n",
-       "      <td>2554</td>\n",
+       "      <td>2554.0</td>\n",
        "      <td>59</td>\n",
        "      <td>44941</td>\n",
+       "      <td>81699</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>99</th>\n",
@@ -1468,16 +1546,16 @@        "      <td>99</td>\n",
        "      <td>99</td>\n",
        "      <td>https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...</td>\n",
        "      <td>1005</td>\n",
-       "      <td>Чукотский АО</td>\n",
-       "      <td>61052.0</td>\n",
+       "      <td>61052</td>\n",
        "      <td>44624</td>\n",
        "      <td>10852</td>\n",
        "      <td>1076</td>\n",
        "      <td>25804</td>\n",
        "      <td>25804</td>\n",
-       "      <td>1336</td>\n",
+       "      <td>1336.0</td>\n",
        "      <td>857</td>\n",
        "      <td>6632</td>\n",
+       "      <td>4049</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1498,96 +1576,109 @@        "97            97        97  https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...   \n",
        "98            98        98  https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...   \n",
        "99            99        99  https:\\www.nalog.ru\\html\\sites\\www.new.nalog.r...   \n",
        "\n",
-       "   ws_title                          поле  код строки  \\\n",
-       "0      1005                          None         NaN   \n",
-       "1      1005                          None         NaN   \n",
-       "2      1005                          None         NaN   \n",
-       "3      1005                          None         NaN   \n",
-       "4      1005                             А         1.0   \n",
-       "..      ...                           ...         ...   \n",
-       "95     1005               Камчатский край   3060920.0   \n",
-       "96     1005           Магаданская область   1118695.0   \n",
-       "97     1005           Сахалинская область   4404357.0   \n",
-       "98     1005  Еврейская автономная область    540981.0   \n",
-       "99     1005                  Чукотский АО     61052.0   \n",
+       "   ws_title                              поле  \\\n",
+       "0      1005                              None   \n",
+       "1      1005                            из них   \n",
+       "2      1005                              None   \n",
+       "3      1005  в том числе в федеральный бюджет   \n",
+       "4      1005                                 1   \n",
+       "..      ...                               ...   \n",
+       "95     1005                           3060920   \n",
+       "96     1005                           1118695   \n",
+       "97     1005                           4404357   \n",
+       "98     1005                            540981   \n",
+       "99     1005                             61052   \n",
        "\n",
-       "         кол. налогоплательщиков                         всего  \\\n",
-       "0   федеральным налогам и сборам                          None   \n",
-       "1                         Всего                         из них   \n",
-       "2                           None  налог на прибыль организаций   \n",
-       "3                           None                        Всего    \n",
-       "4                              2                             3   \n",
-       "..                           ...                           ...   \n",
-       "95                       1273911                         76849   \n",
-       "96                        531438                        121488   \n",
-       "97                       2091467                        458202   \n",
-       "98                        120118                         14430   \n",
-       "99                         44624                         10852   \n",
+       "                         код строки  \\\n",
+       "0                              None   \n",
+       "1                              None   \n",
+       "2   налог на добавленную стоимость    \n",
+       "3                              None   \n",
+       "4                                 2   \n",
+       "..                              ...   \n",
+       "95                          1273911   \n",
+       "96                           531438   \n",
+       "97                          2091467   \n",
+       "98                           120118   \n",
+       "99                            44624   \n",
        "\n",
-       "                   по налогу (сбору)                          по пени  \\\n",
-       "0                               None                             None   \n",
-       "1                               None                             None   \n",
-       "2                               None  налог на добавленную стоимость    \n",
-       "3   в том числе в федеральный бюджет                             None   \n",
-       "4                                  4                                5   \n",
-       "..                               ...                              ...   \n",
-       "95                              9384                           989177   \n",
-       "96                             13819                           249470   \n",
-       "97                             72443                          1110375   \n",
-       "98                              1406                            58193   \n",
-       "99                              1076                            25804   \n",
-       "\n",
-       "                                           по штрафам  \\\n",
+       "                              кол. налогоплательщиков  \\\n",
        "0                                                None   \n",
        "1                                                None   \n",
        "2   из графы 5 налог на добавленную стоимость по т...   \n",
        "3                                                None   \n",
-       "4                                                   6   \n",
+       "4                                                   3   \n",
        "..                                                ...   \n",
-       "95                                             989065   \n",
-       "96                                             249469   \n",
-       "97                                            1110306   \n",
-       "98                                              58037   \n",
-       "99                                              25804   \n",
+       "95                                              76849   \n",
+       "96                                             121488   \n",
+       "97                                             458202   \n",
+       "98                                              14430   \n",
+       "99                                              10852   \n",
        "\n",
-       "                                        проценты  \\\n",
+       "                                           всего  \\\n",
        "0                                           None   \n",
        "1                                           None   \n",
        "2   платежи за пользование природными ресурсами    \n",
        "3                                           None   \n",
-       "4                                              7   \n",
+       "4                                              4   \n",
        "..                                           ...   \n",
-       "95                                         16638   \n",
-       "96                                         31627   \n",
-       "97                                         20647   \n",
-       "98                                          2554   \n",
-       "99                                          1336   \n",
+       "95                                          9384   \n",
+       "96                                         13819   \n",
+       "97                                         72443   \n",
+       "98                                          1406   \n",
+       "99                                          1076   \n",
        "\n",
-       "                                              по ЕСН  \\\n",
+       "                                   по налогу (сбору)  \\\n",
        "0                                               None   \n",
        "1                                               None   \n",
        "2   из графы 7\\n налог на добычу полезных ископаемых   \n",
        "3                                               None   \n",
-       "4                                                  8   \n",
+       "4                                                  5   \n",
        "..                                               ...   \n",
-       "95                                               877   \n",
-       "96                                             29256   \n",
-       "97                                              4214   \n",
-       "98                                                59   \n",
-       "99                                               857   \n",
+       "95                                            989177   \n",
+       "96                                            249470   \n",
+       "97                                           1110375   \n",
+       "98                                             58193   \n",
+       "99                                             25804   \n",
        "\n",
-       "                    по страховым взносам  \n",
-       "0                                   None  \n",
-       "1                                   None  \n",
-       "2   остальные федеральные налоги и сборы  \n",
-       "3                                   None  \n",
-       "4                                      9  \n",
-       "..                                   ...  \n",
-       "95                                191247  \n",
-       "96                                128853  \n",
-       "97                                502243  \n",
-       "98                                 44941  \n",
-       "99                                  6632  \n",
+       "                                 по пени  по штрафам  \\\n",
+       "0                                   None         NaN   \n",
+       "1                                   None         NaN   \n",
+       "2   остальные федеральные налоги и сборы         NaN   \n",
+       "3                                   None         NaN   \n",
+       "4                                      6         7.0   \n",
+       "..                                   ...         ...   \n",
+       "95                                989065     16638.0   \n",
+       "96                                249469     31627.0   \n",
+       "97                               1110306     20647.0   \n",
+       "98                                 58037      2554.0   \n",
+       "99                                 25804      1336.0   \n",
+       "\n",
+       "                         проценты                    по ЕСН  \\\n",
+       "0   региональным налогам и сборам  местным налогам и сборам   \n",
+       "1                            None                      None   \n",
+       "2                            None                      None   \n",
+       "3                            None                      None   \n",
+       "4                               8                         9   \n",
+       "..                            ...                       ...   \n",
+       "95                            877                    191247   \n",
+       "96                          29256                    128853   \n",
+       "97                           4214                    502243   \n",
+       "98                             59                     44941   \n",
+       "99                            857                      6632   \n",
+       "\n",
+       "                        по страховым взносам  \n",
+       "0   налогам со специальным налоговым режимом  \n",
+       "1                                       None  \n",
+       "2                                       None  \n",
+       "3                                       None  \n",
+       "4                                         10  \n",
+       "..                                       ...  \n",
+       "95                                    525892  \n",
+       "96                                     93277  \n",
+       "97                                    606823  \n",
+       "98                                     81699  \n",
+       "99                                      4049  \n",
        "\n",
        "[100 rows x 14 columns]"
       ]
diff --git a/UnRAR.exe b/UnRAR.exe
new file mode 100644
index 0000000000000000000000000000000000000000..01a2b7e64c8b1a7158e1775f0ffbb05370787aa3
Binary files /dev/null and b/UnRAR.exe differ