generated from jstet/poetry_template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_retrieval.py
32 lines (22 loc) · 892 Bytes
/
data_retrieval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from myrtle.pipelines import get_process_notable_ppl, get_split_quotes, filter_quotes
import modal
from myrtle.console import console
import pandas as pd
stub = modal.Stub("myrtle_data")
image = modal.Image.debian_slim().poetry_install_from_file("pyproject.toml")
@stub.function(image=image, timeout=8000, secret=modal.Secret.from_dotenv(__file__))
def f3(chunk, names):
return filter_quotes(chunk, names)
@stub.local_entrypoint()
def main():
notable = get_process_notable_ppl()
string = "\n".join(notable)
with open("./data/notable.csv", "w") as file:
file.write(string)
chunks = get_split_quotes(30)
lst = []
console.log("Starting to filter..")
for chunk in f3.map(chunks, kwargs={"names": notable}):
lst.append(chunk)
filtered_quotes_df = pd.concat(lst)
filtered_quotes_df.to_csv("./data/filtered_quotes.csv", index=False)