fedora-copr · xsuchy · Jan 17, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,10 @@ __pycache__/
 *.py[cod]
 *$py.class
 
+/server/fullchain.pem
+/server/privkey.pem
+/server/cert.pem
+
 # C extensions
 *.so
 

diff --git a/Containerfile b/Containerfile
@@ -1,7 +1,7 @@
 FROM fedora:40
 # Fedora's llama-cpp-python is segfaulting on the mistral model we use :/
 RUN dnf install -y fastapi-cli python3-fastapi python3-requests python3-drain3 python3-pip python3-pydantic-settings python3-starlette+full \
-       gcc gcc-c++ python3-scikit-build git-core \
+       gcc gcc-c++ python3-scikit-build git-core python3-gunicorn \
     && dnf clean all
 # the newest 0.2.86 fails to build, it seems vendored llama-cpp is missing in the archive
 RUN pip3 install llama_cpp_python==0.2.85 sse-starlette starlette-context \

diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml
@@ -31,4 +31,5 @@ services:
       - "${LOGDETECTIVE_SERVER_PORT:-8080}:${LOGDETECTIVE_SERVER_PORT:-8080}"
     env_file: .env
     # --no-reload: doesn't work in a container - `PermissionError: Permission denied (os error 13) about ["/proc"]`
-    command: fastapi run /src/logdetective/server.py --host 0.0.0.0 --port $LOGDETECTIVE_SERVER_PORT --no-reload
+    # command: fastapi run /src/logdetective/server.py --host 0.0.0.0 --port $LOGDETECTIVE_SERVER_PORT --no-reload
+    command: ["gunicorn", "-c", "/src/server/gunicorn-prod.config.py", "logdetective.server:app"]
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -34,4 +34,6 @@ services:
       - "${LOGDETECTIVE_SERVER_PORT:-8080}:${LOGDETECTIVE_SERVER_PORT:-8080}"
     env_file: .env
     # --no-reload: doesn't work in a container - `PermissionError: Permission denied (os error 13) about ["/proc"]`
-    command: fastapi dev /src/logdetective/server.py --host 0.0.0.0 --port $LOGDETECTIVE_SERVER_PORT --no-reload
+    # command: fastapi dev /src/logdetective/server.py --host 0.0.0.0 --port $LOGDETECTIVE_SERVER_PORT --no-reload
+    # timeout set to 240 - 4 minutes should be enough for one LLM execution locally on a CPU
+    command: ["gunicorn", "-k", "uvicorn.workers.UvicornWorker", "--timeout", "240", "logdetective.server:app", "-b", "0.0.0.0:$LOGDETECTIVE_SERVER_PORT"]
diff --git a/server/gunicorn-prod.config.py b/server/gunicorn-prod.config.py
@@ -0,0 +1,11 @@
+import os
+bind = f"0.0.0.0:{os.environ['LOGDETECTIVE_SERVER_PORT']}"
+worker_class = "uvicorn.workers.UvicornWorker"
+workers = 2
+# timeout set to 120 - 2 minutes should be enough for one LLM execution in production on a GPU
+timeout = 120
+# write to stdout
+accesslog = '-'
+certfile = "/src/server/cert.pem"
+keyfile = "/src/server/privkey.pem"
+ca_certs = "/src/server/fullchain.pem"