-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranslate.py
120 lines (97 loc) · 4.11 KB
/
translate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pandas as pd
import requests
import time
import pickle
import pygame.mixer
import datetime
import re # Import regular expressions
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
def timedelta_to_str(td):
"""Convert a timedelta object to a string representation."""
days, remainder = divmod(td.total_seconds(), 86400) # 86400 seconds in a day
hours, remainder = divmod(remainder, 3600) # 3600 seconds in an hour
minutes, seconds = divmod(remainder, 60)
parts = []
if days > 0:
parts.append(f"{int(days)}d")
if hours > 0:
parts.append(f"{int(hours):02}h")
parts.append(f"{int(minutes):02}m {int(seconds):02}s")
return ' '.join(parts)
def should_skip_translation(text):
"""Check if the text matches certain patterns that should not be translated."""
if not isinstance(text, str):
text = str(text)
pattern = re.compile(r'^\d+\sV$')
if pattern.match(text):
return True
return False
def col_num_to_letter(col_num):
"""Convert a column number into its Excel-style column letter equivalent."""
letter = ''
while col_num:
col_num, remainder = divmod(col_num - 1, 26)
letter = chr(65 + remainder) + letter
return letter
# Load your Excel file
df = pd.read_excel('tlumacztest.xlsx', engine='openpyxl')
# Create a copy of the dataframe for translations
df_translated = df.copy()
# Change the datatype of all columns in df_translated to object
for col in df_translated.columns:
df_translated[col] = df_translated[col].astype('object')
# Set up Google Cloud Translation API endpoint and key
endpoint = "https://translation.googleapis.com/language/translate/v2"
api_key = "YOUR_API_KEY" # Replace with your actual API key
# Create a session with retries
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
# Try to load the cache from a file
try:
with open('translation_cache.pkl', 'rb') as f:
translation_cache = pickle.load(f)
except FileNotFoundError:
translation_cache = {} # If file not found, initialize an empty cache
# Get total number of rows and columns in the dataframe
total_rows, total_columns = df.shape
# Track start time
start_time = datetime.datetime.now()
# Iterate over each cell in the dataframe
for row_idx in range(1, total_rows):
for col_idx in range(1, total_columns):
cell_content = df.iat[row_idx, col_idx]
if pd.isna(cell_content) or cell_content == '':
continue
if should_skip_translation(str(cell_content)):
continue
if cell_content in translation_cache:
translation = translation_cache[cell_content]
else:
data = {
'q': cell_content,
'source': 'pl',
'target': 'no',
'key': api_key
}
try:
response = session.post(endpoint, data=data)
translation = response.json()['data']['translations'][0]['translatedText']
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
continue
translation_cache[cell_content] = translation
time.sleep(0.1)
df_translated.iat[row_idx, col_idx] = translation
cells_processed = (row_idx * total_columns) + col_idx
total_cells = total_rows * total_columns
percentage_done = (cells_processed / total_cells) * 100
elapsed_time = datetime.datetime.now() - start_time
estimated_time_left = (elapsed_time / cells_processed) * (total_cells - cells_processed)
estimated_time_str = timedelta_to_str(estimated_time_left)
excel_col = col_num_to_letter(col_idx + 1)
excel_row = row_idx + 1
print(f"\rTranslating cell {row_idx},{col_idx} ({excel_col}{excel_row}). Progress: {percentage_done:.2f}%. Estimated time left: {estimated_time_str} ", end='')
# Save the new Excel file
df_translated.to_excel('your_output_file.xlsx', index=False)