Source code for tweebo_parser.api

'''
Module contains the following class:
'''

from pathlib import Path
import json
import tempfile
from typing import List, Dict, Union

import requests


[docs]class API(object): ''' Allows easy connection and requests to the TweeboParse API server. \ TweeboParse is a Twitter specific dependency parser. Attributes: 1. hostname -- The IP address of the TweeboParser API server. 2. port -- The Port that the TweeboParser API server is attached to. 3. retries -- Number of times to retry json decoding the returned data. 4. log_errors -- Whether to log errors or not. If this is True it logs errors under `tweebo_log` file within your temp_dir .. automethod:: __init__ '''
[docs] def __init__(self, hostname: str = '0.0.0.0', port: int = 8000, retries: int = 10, log_errors: bool = False) -> None: ''' :param hostname: The IP address of the TweeboParser API server. :param port: The Port that the TweeboParser API server is attached to. :param retries: Number of times to retry json decoding the returned data. ''' self.hostname = hostname self.port = port self.retries = retries self.log_errors = log_errors self._log_fp = Path(tempfile.gettempdir(), 'tweebo_log') # Delete the old file if self._log_fp.is_file(): self._log_fp.open('w').close()
[docs] def log_error(self, text: str) -> None: ''' Given some error text it will log the text if self.log_errors is True :param text: Error text to log ''' if self.log_errors: with self._log_fp.open('a+') as log_file: log_file.write(f'{text}\n')
[docs] def parse_conll(self, texts: List[str], retry_count: int = 0) -> List[str]: ''' Processes the texts using TweeboParse and returns them in CoNLL format. :param texts: The List of Strings to be processed by TweeboParse. :param retry_count: The number of times it has retried for. Default 0 does not require setting, main purpose is for recursion. :return: A list of CoNLL formated strings. :raises ServerError: Caused when the server is not running. :raises :py:class:`requests.exceptions.HTTPError`: Caused when the input texts is not formated correctly e.g. When you give it a String not a list of Strings. :raises :py:class:`json.JSONDecodeError`: Caused if after self.retries attempts to parse the data it cannot decode the data. :Example: ''' post_data = {'texts': texts, 'output_type': 'conll'} try: response = requests.post(f'http://{self.hostname}:{self.port}', json=post_data, headers={'Connection': 'close'}) response.raise_for_status() except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as server_error: raise ServerError(server_error, self.hostname, self.port) except requests.exceptions.HTTPError as http_error: raise http_error else: try: return response.json() except json.JSONDecodeError as json_exception: if retry_count == self.retries: self.log_error(response.text) raise Exception('Json Decoding error cannot parse this ' f':\n{response.text}') return self.parse_conll(texts, retry_count + 1)
[docs] def parse_stanford(self, texts: List[str], retry_count: int = 0 ) -> List[Dict[str, Union[str, int]]]: ''' Processes the texts using TweeboParse and returns them in a Stanford styled format (as in the same format as the json return of the Stanford CoreNLP server dependency parser). :param texts: The List of Strings to be processed by TweeboParse. :param retry_count: The number of times it has retried for. Default 0 does not require setting, main purpose is for recursion. :return: A list of dicts. :raises ServerError: Caused when the server is not running. :raises :py:class:`requests.exceptions.HTTPError`: Caused when the input texts is not formated correctly e.g. When you give it a String not a list of Strings. :raises :py:class:`json.JSONDecodeError`: Caused if after self.retries attempts to parse the data it cannot decode the data. :Example: :: from tweebo_parser import API tweebo_api = API() text_data = ['hello how are you', 'Where are we going'] result = tweebo_api.parse_stanford(text_data) print(result) [{}] ''' post_data = {'texts': texts, 'output_type': 'stanford'} try: response = requests.post(f'http://{self.hostname}:{self.port}', json=post_data, headers={'Connection': 'close'}) response.raise_for_status() except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.InvalidSchema) as server_error: raise ServerError(server_error, self.hostname, self.port) except requests.exceptions.HTTPError as http_error: raise http_error else: try: return response.json() except json.JSONDecodeError as json_exception: if retry_count == self.retries: self.log_error(response.text) raise Exception('Json Decoding error cannot parse this ' f':\n{response.text}') return self.parse_stanford(texts, retry_count + 1)
[docs]class ServerError(Exception): ''' Exception raised when the Server API is not avliable. Attributes: 1. message -- Explains why it could not connect to the server, and details of the server it tried to connect to. .. automethod:: __init__ '''
[docs] def __init__(self, excpetion: requests.exceptions.RequestException, hostname: str, port: int) -> None: ''' :param exception: The requests exception instance that is raised. :param hostname: The IP address of the API server. :param port: The Port that the API server is attached to. ''' message = f'Cannot connect to the server at {hostname}:{port}' if isinstance(excpetion, requests.exceptions.Timeout): message = 'Error caused by Time out. This is most likely due to '\ f'the server not running at: {hostname}:{port}' elif isinstance(excpetion, requests.exceptions.ConnectionError): message = 'Error caused by Connection Error. This is most likely '\ f'due to the server not running at {hostname}:{port}' self.message = message