python – Designing an URL Shortener

Recently I was assigned a URL shortener design problem while interviewing with a well known organization. Here is the exact problem statement:

  1. Build a simple URL shortener service that will accept a URL as an argument over a REST API and return a shortened URL as a result.

  2. The URL and shortened URL should be stored in memory by applicaon.

    a. (BONUS) Instead of in memory, store these things in a text file.

  3. If I again ask for the same URL, it should give me the same URL as it gave before instead of generating a new one.

  4. (BONUS) Put this application in a Docker image by writing a Dockerfile and provide the docker image link along with the source code link.

The problem looked simpler and I did all bonus points as well but still didn’t hear back from them (not even the feedback).

Here is my approach:

  • As they have asked for multiple storage mechanisms, I created a factory for key-value stores and implemented memory and file based stores.
  • I could think of two approaches for shortening the URLs: 1) Hashing 2) Base 62 of a counter. Base 62 seemed proper approach but they have added a requirement that for same long URL, the same shortened URL should be returned. As Base 62 works with an autoincrement counter, achieving this requirement without extra memory/storage is not possible. So I went with hashing the long URLs (I know there are chances of collisions, I mentioned this as a trade-off).

Here is my base key-value store:

kvstore_base.py

from abc import ABC, abstractmethod


class KVStoreBase(ABC):
    """
    Base key-value store class that provides abstract methods required for
    any storage mechanism used to store shortened URLs.
    """

    def __init__(self, store):
        """
        Constructor.

        :param store: Object of any storage mechanism.
        """
        self.store = store

    @abstractmethod
    def __getitem__(self, key: str) -> str:
        pass

    @abstractmethod
    def __setitem__(self, key: str, value: str) -> None:
        pass

    @abstractmethod
    def __contains__(self, key: str) -> bool:
        pass

file_store.py

import io

from .kvstore_base import KVStoreBase


class FileStore(KVStoreBase):
    """
    Memory store, which uses 'dict' data structure to
    store the key-value pairs.
    """

    def __init__(self):
        super().__init__(open("db.txt", "a+"))

    def __getitem__(self, key: str) -> str:
        """
        Get value for the given key from dict
        :param key: Key for which the value is to be retrieved.
        :return: value
        """
        # iterate over file and search for given key.
        try:
            # move pointer to initial position in file
            self.store.seek(0, io.SEEK_SET)
            for line in self.store:
                suffix, long_url = line.split()
                if suffix == key:
                    return long_url
        except Exception as err:
            print(str(err))
        return ""

    def __setitem__(self, key: str, value: str) -> None:
        """
        Set value for the given key into dict.
        :param key: Key to be added.
        :param value: Value corresponding to the key.
        :return: None
        """
        # move pointer to the end of the file for writing.
        self.store.seek(0, io.SEEK_END)
        if self.store.tell() != 0:
            self.store.write(f"n{key} {value}")
        else:
            self.store.write(f"{key} {value}")
        self.store.flush()

    def __contains__(self, key: str) -> bool:
        """
        Check whether the given key is present in the dict.
        :param key: Key whose presence is to be checked.
        :return: True if key is present, False otherwise.
        """
        return True if self.__getitem__(key) else False

    def __del__(self) -> None:
        """
        Free file resource.
        """
        self.store.close()

kvstore_factory.py

from .memory_store import MemoryStore
from .file_store import FileStore
from .kvstore_base import KVStoreBase


class Factory:
    """
    KV Store factory to instantiate storage objects.
    """

    kvstore_map: dict = {"MEMORY": MemoryStore, "FILE": FileStore}

    @staticmethod
    def get_instance(instance_type: str) -> KVStoreBase:
        """
        Instantiate given KV store class dynamically.
        :param instance_type: Type of KV store to be instantiated.
        :return: KV store object.
        """
        try:
            return Factory.kvstore_map(instance_type)()
        except KeyError:
            raise Exception("Invalid instance requested.")

I used FastAPI to create endpoints, here is the two main routes (one for shortening the URL and one for redirecting):

routes/urls.py

import hashlib
from urllib.parse import urlparse

from fastapi import APIRouter, HTTPException, Request

from ..utils.store_connector import StoreConnector
from ..models.urls import Url
from starlette.responses import RedirectResponse

router = APIRouter()
store = StoreConnector().store


def _get_base_url(endpoint_url: str) -> str:
    """
    Extract base url from any endpoint URL.
    :param endpoint_url: Endpoint URL from which the base URL is to be extracted.
    :return: Base URL
    """
    return f"{urlparse(endpoint_url).scheme}://{urlparse(endpoint_url).hostname}:{urlparse(endpoint_url).port}"


@router.post("/shorten", tags=("URLs"))
async def shorten(url_obj: Url, request: Request) -> Url:
    """
    Shorten the given long URL.
    :param request: request object
    :param url_obj: URL object
    :return: shortened URL.
    """
    suffix = hashlib.sha256(url_obj.url.encode("utf-8")).hexdigest()(:8)
    if suffix not in store:
        # store short-url-suffix: long-url into data store.
        store(suffix) = url_obj.url

    return Url(url=f"{_get_base_url(request.url_for('shorten'))}/{suffix}")


@router.get("/{suffix}", tags=("URLs"))
async def redirect(suffix: str) -> RedirectResponse:
    """
    Redirect to long URL for the given URL ID.
    :param suffix: URL ID for the corresponding long URL.
    :return: Long URL.
    """
    long_url = store(suffix)
    if long_url:
        # return permanent redirect so that browsers store this in their cache.
        response = RedirectResponse(url=long_url, status_code=301)
        return response
    raise HTTPException(status_code=404, detail="Short URL not found.")

I exposed the storage mechanism to docker environment variable and created a storage connector class, which calls the factory based on the user configuration.

store_connector.py

import os
import sys
from sys import stderr

from .singleton import Singleton
from ..kvstores.kvstore_factory import Factory
from ..kvstores.kvstore_base import KVStoreBase


class StoreConnector(metaclass=Singleton):
    """
    Key Value store singleton class that can be used across all modules.
    """

    def __init__(self):
        try:
            store_type = os.environ.get("STORE_TYPE", "MEMORY")
            self._store = Factory.get_instance(store_type)
        except KeyError as ex:
            print(ex, file=stderr)
            # one of the required environment variable is not set
            print(
                "One of the required environment variables is not set",
                file=stderr,
            )
            sys.exit(1)
        except Exception as ex:
            print(ex, file=stderr)
            sys.exit(1)

    @property
    def store(self) -> KVStoreBase:
        return self._store

Here is my directory structure:

    url-shortener/
├── Dockerfile
├── LICENSE
├── README.md
├── requirements.txt
├── src
│   ├── __init__.py
│   ├── kvstores
│   │   ├── __init__.py
│   │   ├── file_store.py
│   │   ├── kvstore_base.py
│   │   ├── kvstore_factory.py
│   │   └── memory_store.py
│   ├── main.py
│   ├── models
│   │   ├── __init__.py
│   │   └── urls.py
│   ├── routes
│   │   ├── __init__.py
│   │   └── urls.py
│   └── utils
│       ├── singleton.py
│       └── store_connector.py
├── start.sh
└── tests
    ├── __init__.py
    ├── test_file_store.py
    ├── test_kvstore_factory.py
    ├── test_memory_store.py
    ├── test_routes.py
    └── test_store.py

In the instructions, they emphasized the following points:

  • Readability of code
  • Tests – Unit tests definitely and more if you can think of
  • A good structure to your code and well written file & variable names etc.

So, which of those points my code lacks?